提交 5160a127 编写于 作者: G gineshidalgo99

CPU_ONLY by ONLY_CUDA & flags not required at deployment

上级 db3eaffe
### Posting rules
1. **Add** the **system configuration (all of it!), command and output** if you have some kind of error or performance question.
2. **No duplicated** posts.
3. **No** posts about **questions already answered / clearly explained in** the **documentation** (e.g. **no more low-speed nor out-of-memory questions**).
4. Set a **proper issue title**: add the Ubuntu/Windows word and be specific (e.g. do not simple call it: `Compile error`).
5. **No** questions about **training**. OpenPose only implements testing.
6. Only English comments.
Issues/comments that do not follow this will be **ignored or removed** with no further clarification.
1. **No** questions about **training**. OpenPose only implements testing.
2. **No** questions about **Caffe installation errors/issues**. Check [Caffe](http://caffe.berkeleyvision.org) documentation and help for those errors.
3. **Fill** the **Your System Configuration section (all of it!)** if you have some kind of error or performance question.
4. **No duplicated** posts.
5. **No** posts about **questions already answered / clearly explained in** the **documentation** (e.g. **no more low-speed nor out-of-memory questions**).
6. Set a **proper issue title**: add the Ubuntu/Windows word and be specific (e.g. do not simple call it: `Compile error`).
7. Only English comments.
Issues/comments which do not follow these rules will be **ignored or removed** with no further clarification.
### Issue summary
### Issue Summary
### Executed command (if any)
### Executed Command (if any)
Note: add `--logging_level 0` to get higher debug information.
### OpenPose output (if any)
### OpenPose Output (if any)
### Type of issue
### Type of Issue
You might select multiple topics, delete the rest:
- Compilation/installation error
- Execution error
......@@ -33,13 +34,12 @@ You might select multiple topics, delete the rest:
### Your system configuration
**Installation mode**: CMake or sh script or manual Makefile installation.
### Your System Configuration
**Operating system** (`lsb_release -a` in Ubuntu):
**Installation mode**: CMake, sh script, or manual Makefile installation (Ubuntu); VS2015, VS2017, CMake, ... (Windows)
**CUDA version** (`cat /usr/local/cuda/version.txt` in most cases):
**cuDNN version**:
**GPU model** (`nvidia-smi` in Ubuntu):
**Caffe version**: Default from OpenPose or custom version.
**OpenCV version**: installed with `apt-get install libopencv-dev` (Ubuntu) or default from OpenPose (Windows) or OpenCV 2.X or OpenCV 3.X. Especify **full version** (e.g. 3.1 or 2.4.9)
Generation mode (only for Ubuntu): Makefile + Makefile.config (default, Ubuntu) or CMake (Ubuntu, Windows) or Visual Studio (Windows).
**OpenCV version**: pre-compiled `apt-get install libopencv-dev` (only Ubuntu); OpenPose default (only Windows); compiled from source: 2.4.9, 2.4.12, 3.1, 3.2, ...
Compiler (`gcc --version` in Ubuntu):
......@@ -28,9 +28,6 @@ set(BUILD_SHARED_LIBS ON)
# Turn on C++11
add_definitions(-std=c++11)
# OpenPose flags
add_definitions(-DUSE_CAFFE)
# C++ additional flags
set(OP_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Wpedantic -Wall -Wextra -Wfatal-errors")
......@@ -46,12 +43,21 @@ include(CMakeDependentOption)
if (${DL_FRAMEWORK} MATCHES "CAFFE")
CMAKE_DEPENDENT_OPTION(BUILD_CAFFE "Build Caffe as part of OpenPose." ON
"DL_FRAMEWORK" ON)
# OpenPose flags
add_definitions(-DUSE_CAFFE)
endif (${DL_FRAMEWORK} MATCHES "CAFFE")
# Set the acceleration library
set(GPU_MODE CUDA CACHE STRING "Select the acceleration GPU library or CPU otherwise.")
set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA)
# set_property(CACHE GPU_MODE PROPERTY STRINGS CUDA OPENCL CPU_ONLY)
if (${GPU_MODE} MATCHES "CUDA")
# OpenPose flags
add_definitions(-DUSE_CUDA)
elseif (${GPU_MODE} MATCHES "CPU_ONLY")
# OpenPose flag for Caffe
add_definitions(-DCPU_ONLY)
endif ()
# Suboptions for GPU architectures
if (${GPU_MODE} MATCHES "CUDA")
......
......@@ -139,7 +139,7 @@ endif
CUDA_LIB_DIR += $(CUDA_DIR)/lib
INCLUDE_DIRS += $(BUILD_INCLUDE_DIR) ./src ./include
ifneq ($(CPU_ONLY), 1)
ifeq ($(USE_CUDA), 1)
INCLUDE_DIRS += $(CUDA_INCLUDE_DIR)
LIBRARY_DIRS += $(CUDA_LIB_DIR)
LIBRARIES += cudart cublas curand
......@@ -251,7 +251,7 @@ endif
# libstdc++ for NVCC compatibility on OS X >= 10.9 with CUDA < 7.0
ifeq ($(OSX), 1)
CXX := /usr/bin/clang++
ifneq ($(CPU_ONLY), 1)
ifeq ($(USE_CUDA), 1)
CUDA_VERSION := $(shell $(CUDA_DIR)/bin/nvcc -V | grep -o 'release [0-9.]*' | tr -d '[a-z ]')
ifeq ($(shell echo | awk '{exit $(CUDA_VERSION) < 7.0;}'), 1)
CXXFLAGS += -stdlib=libstdc++
......@@ -300,12 +300,6 @@ else
COMMON_FLAGS += -DNDEBUG -O3
endif
# cuDNN acceleration configuration.
ifeq ($(USE_CUDNN), 1)
LIBRARIES += cudnn
COMMON_FLAGS += -DUSE_CUDNN
endif
# configure IO libraries
ifeq ($(USE_OPENCV), 1)
COMMON_FLAGS += -DUSE_OPENCV
......@@ -321,52 +315,11 @@ endif
endif
# CPU-only configuration
ifeq ($(CPU_ONLY), 1)
OBJS := $(CXX_OBJS)
ALL_WARNS := $(ALL_CXX_WARNS)
COMMON_FLAGS += -DCPU_ONLY
endif
# BLAS configuration (default = ATLAS)
BLAS ?= atlas
ifeq ($(BLAS), mkl)
# MKL
LIBRARIES += mkl_rt
COMMON_FLAGS += -DUSE_MKL
MKLROOT ?= /opt/intel/mkl
BLAS_INCLUDE ?= $(MKLROOT)/include
BLAS_LIB ?= $(MKLROOT)/lib $(MKLROOT)/lib/intel64
else ifeq ($(BLAS), open)
# OpenBLAS
LIBRARIES += openblas
ifeq ($(USE_CUDA), 1)
COMMON_FLAGS += -DUSE_CUDA
else
# ATLAS
ifeq ($(LINUX), 1)
ifeq ($(BLAS), atlas)
# Linux simply has cblas and atlas
LIBRARIES += cblas atlas
endif
else ifeq ($(OSX), 1)
# OS X packages atlas as the vecLib framework
LIBRARIES += cblas
# 10.10 has accelerate while 10.9 has veclib
XCODE_CLT_VER := $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables | grep 'version' | sed 's/[^0-9]*\([0-9]\).*/\1/')
XCODE_CLT_GEQ_7 := $(shell [ $(XCODE_CLT_VER) -gt 6 ] && echo 1)
XCODE_CLT_GEQ_6 := $(shell [ $(XCODE_CLT_VER) -gt 5 ] && echo 1)
ifeq ($(XCODE_CLT_GEQ_7), 1)
BLAS_INCLUDE ?= /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/$(shell ls /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/ | sort | tail -1)/System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/Headers
else ifeq ($(XCODE_CLT_GEQ_6), 1)
BLAS_INCLUDE ?= /System/Library/Frameworks/Accelerate.framework/Versions/Current/Frameworks/vecLib.framework/Headers/
LDFLAGS += -framework Accelerate
else
BLAS_INCLUDE ?= /System/Library/Frameworks/vecLib.framework/Versions/Current/Headers/
LDFLAGS += -framework vecLib
endif
endif
COMMON_FLAGS += -DCPU_ONLY # For Caffe
endif
#'
INCLUDE_DIRS += $(BLAS_INCLUDE)
LIBRARY_DIRS += $(BLAS_LIB)
LIBRARY_DIRS += $(LIB_BUILD_DIR)
......
......@@ -107,13 +107,13 @@ Note: you should not need to modify the OpenPose source code nor examples. In th
### OpenPose Library
Your case if you want to change internal functions and/or extend its functionality. First, take a look at the [Demo](#demo) and [OpenPose Wrapper](#openpose-wrapper). Second, read the 2 following subsections: OpenPose Overview and Extending Functionality.
Your case if you want to change internal functions and/or extend its functionality.
1. OpenPose Overview: Learn the basics about the library source code in [doc/library_overview.md](doc/library_overview.md).
2. Extending Functionality: Learn how to extend the library in [doc/library_extend_functionality.md](doc/library_extend_functionality.md).
3. Adding An Extra Module: Learn how to add an extra module in [doc/library_add_new_module.md](doc/library_add_new_module.md).
1. Take a look at the [Demo](#demo) and [OpenPose Wrapper](#openpose-wrapper).
2. OpenPose Overview: Learn the basics about the library source code in [doc/library_overview.md](doc/library_overview.md).
3. Extending Functionality: Learn how to extend the library in [doc/library_extend_functionality.md](doc/library_extend_functionality.md).
4. Adding An Extra Module: Learn how to add an extra module in [doc/library_add_new_module.md](doc/library_add_new_module.md).
5. See the Doxygen documentation on [http://cmu-perceptual-computing-lab.github.io/openpose/html/index.html](http://cmu-perceptual-computing-lab.github.io/openpose/html/index.html) or build it from the source code.
......
......@@ -18,7 +18,7 @@ OpenPose - Installation and FAQ
## Operating Systems
- **Ubuntu** 14 and 16.
- **Windows** 8 and 10.
- **Nvidia Jetson TX2**, installation instructions in [doc/installation_jetson_tx2](./installation_jetson_tx2).
- **Nvidia Jetson TX2**, installation instructions in [doc/installation_jetson_tx2.md](./installation_jetson_tx2.md).
- OpenPose has also been used on **Windows 7**, **Mac**, **CentOS**, and **Nvidia Jetson (TK1 and TX1)** embedded systems. However, we do not officially support them at the moment.
......@@ -133,7 +133,7 @@ You just need to remove the OpenPose folder, by default called `openpose/`. E.g.
### Installation - Library
1. Install the pre-requisites:
1. Microsoft Visual Studio (VS) 2015 Enterprise Update 3. VS Enterprise Update 1 and VS 2017 will give some compiler errors, while VS 2015 Community has not been tested.
1. Microsoft Visual Studio (VS) 2015 Enterprise Update 3. If Visual Studio 2017 Community is desired, we do not support it, but it might be compiled by firstly [enabling CUDA 8.0](https://stackoverflow.com/questions/43745099/using-cuda-with-visual-studio-2017?answertab=active#tab-top). VS Enterprise Update 1 will give some compiler errors and VS 2015 Community has not been tested.
2. [CUDA 8](https://developer.nvidia.com/cuda-downloads): Install it on the default location, `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0`. Otherwise, modify the Visual Studio project solution accordingly. Install CUDA 8.0 after Visual Studio 2015 is installed to assure that the CUDA installation will generate all necessary files for VS. If CUDA was already installed, re-install it after installing VS!
3. [cuDNN 5.1](https://developer.nvidia.com/cudnn): Once you have downloaded it, just unzip it and copy (merge) the contents on the CUDA folder, `C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v8.0`.
2. Download the OpenPose dependencies and models (body, face and hand models) by double-clicking on `{openpose_path}\windows\download_3rdparty_and_models.bat`. Alternatively, you might prefer to download them manually:
......@@ -214,8 +214,8 @@ We only modified some Caffe compilation flags and minor details. You can use you
## Compiling without cuDNN
The [cuDNN](https://developer.nvidia.com/cudnn) library is not mandatory, but required for full keypoint detection accuracy. In case your graphics card is not compatible with cuDNN, you can disable it by:
- Ubuntu: Modifying the `Makefile.config` files in both the OpenPose and `3rdparty/caffe` folders, disabling `USE_CUDNN`.
- Windows: Compiling Caffe by your own with without cuDNN support and removing the `USE_CUDNN` define from the OpenPose project solution in Visual Studio.
- Ubuntu: Disable `USE_CUDNN` in the `Makefile.config` file in `3rdparty/caffe`, and recompiling Caffe.
- Windows: Compiling Caffe by your own with without cuDNN support and replacing the [3rdparty/windows/caffe](../3rdparty/windows/caffe)) folder by your own implementation.
Then, you would have to reduce the `--net_resolution` flag to fit the model into the GPU memory. You can try values like "640x320", "320x240", "320x160", or "160x80" to see your GPU memory capabilities. After finding the maximum approximate resolution that your GPU can handle without throwing an out-of-memory error, adjust the `net_resolution` ratio to your image or video to be processed (see the `--net_resolution` explanation from [doc/demo_overview.md](./demo_overview.md)).
......
......@@ -60,7 +60,7 @@ The program uses 3 cameras by default, but cameras can be added or removed from
- (Optional) Spinnaker SDK overview: https://www.ptgrey.com/spinnaker-sdk
6. Get the last OpenGL Glut library version for the rendering:
- Download the latest `MSVC Package` from http://www.transmissionzero.co.uk/software/freeglut-devel/
- Copy `{freeglutParentDirectory}\freeglut\bin\x64\` as `{OpenPoseDirectory}\3rdparty\windows\freeglut\bin\bin\`.
- Copy `{freeglutParentDirectory}\freeglut\bin\x64\` as `{OpenPoseDirectory}\3rdparty\windows\freeglut\bin\`.
- Copy `{freeglutParentDirectory}\freeglut\include\` as `{OpenPoseDirectory}\3rdparty\windows\freeglut\include\`.
- Copy `{freeglutParentDirectory}\freeglut\lib\x64\` as `{OpenPoseDirectory}\3rdparty\windows\freeglut\lib\`.
......@@ -78,7 +78,6 @@ We did not create an Ubuntu version. We did an very first version for Ubuntu 16
8. Get the required files from `{OpenPose path}/examples_beta/openpose3d/`. Check the Windows VS solution for more details.
9. Create a proper Makefile or CMake file to run it. The following code is part of an old QMake (Qt) file generated for the old version, you can ideally get all the flags and includes from it:
```
DEFINES += USE_CAFFE USE_CUDNN
INCLUDEPATH += \
$$PWD/include \
$$PWD/3rdparty/caffe/include \
......
......@@ -122,14 +122,18 @@ OpenPose Library - Release Notes
3. CvMatToOutput and Renderers allow to keep input resolution as output for images (core module).
3. New standalone face keypoint detector based on OpenCV face detector: much faster if body keypoint detection is not required but much less accurate.
4. Face and hand keypoint detectors now can return each keypoint heatmap.
5. COCO JSON file outputs 0 as score for non-detected keypoints.
6. Added example for OpenPose for user asynchronous output and cleaned all `tutorial_wrapper/` examples.
7. Added `-1` option for `net_resolution` in order to auto-select the best possible aspect ratio given the user input.
5. The flag `USE_CUDNN` is no longer required; `USE_CAFFE` and `USE_CUDA` (replacing the old `CPU_ONLY`) are no longer required to use the library, only to build it. In addition, Caffe and its dependencies have been removed from the OpenPose header files. Only OpenCV include and lib folders are required when building a project using OpenPose.
6. OpenPose successfully compiles if the flags `USE_CAFFE` and/or `USE_CUDA` are not enabled, although it will give an error saying they are required.
7. COCO JSON file outputs 0 as score for non-detected keypoints.
8. Added example for OpenPose for user asynchronous output and cleaned all `tutorial_wrapper/` examples.
9. Added `-1` option for `net_resolution` in order to auto-select the best possible aspect ratio given the user input.
2. Functions or parameters renamed:
1. OpenPose able to change its size and initial size:
1. Flag `resolution` renamed as `output_resolution`.
2. FrameDisplayer, GuiInfoAdder and Gui constructors arguments modified (gui module).
3. OpOutputToCvMat constructor removed (core module).
4. New Renders classes to split GpuRenderers from CpuRenderers.
5. Etc.
2. `CPU_ONLY` changed by `USE_CUDA` to keep format.
3. Main bugs fixed:
1. Ubuntu installer script now works even if Python pip was not installed previously.
......@@ -173,12 +173,13 @@ namespace op
wDatumProducer = std::make_shared<WDatumProducer<TDatumsPtr, TDatums>>(datumProducer);
// Input cvMat to OpenPose format
const auto cvMatToOpInput = std::make_shared<CvMatToOpInput>(
wrapperStructPose.netInputSize, wrapperStructPose.scalesNumber, wrapperStructPose.scaleGap
);
const auto cvMatToOpInput = std::make_shared<CvMatToOpInput>();
spWCvMatToOpInput = std::make_shared<WCvMatToOpInput<TDatumsPtr>>(cvMatToOpInput);
const auto cvMatToOpOutput = std::make_shared<CvMatToOpOutput>(finalOutputSize, displayGui);
spWCvMatToOpOutput = std::make_shared<WCvMatToOpOutput<TDatumsPtr>>(cvMatToOpOutput);
if (displayGui)
{
const auto cvMatToOpOutput = std::make_shared<CvMatToOpOutput>();
spWCvMatToOpOutput = std::make_shared<WCvMatToOpOutput<TDatumsPtr>>(cvMatToOpOutput);
}
// Hand extractor(s)
if (wrapperStructHand.enable)
......@@ -196,7 +197,7 @@ namespace op
spWPoses.at(gpuId) = {std::make_shared<WHandDetectorFromTxt<TDatumsPtr>>(handDetector)};
// Hand keypoint extractor
const auto netOutputSize = wrapperStructHand.netInputSize;
const auto handExtractor = std::make_shared<HandExtractor>(
const auto handExtractor = std::make_shared<HandExtractorCaffe>(
wrapperStructHand.netInputSize, netOutputSize, wrapperStructPose.modelFolder,
gpuId + gpuNumberStart, wrapperStructHand.scalesNumber, wrapperStructHand.scaleRange
);
......@@ -304,7 +305,7 @@ namespace op
try
{
// Security checks
if (spWCvMatToOpInput == nullptr || spWCvMatToOpOutput == nullptr)
if (spWCvMatToOpInput == nullptr)
error("Configure the WrapperHandFromJsonTest class before calling `start()`.", __LINE__, __FUNCTION__, __FILE__);
if (wDatumProducer == nullptr)
{
......@@ -325,9 +326,14 @@ namespace op
// If custom user Worker in same thread or producer on same thread
spWIdGenerator = std::make_shared<WIdGenerator<std::shared_ptr<TDatums>>>();
// OpenPose producer
mThreadManager.add(threadId++, {wDatumProducer, spWIdGenerator, spWCvMatToOpInput, spWCvMatToOpOutput}, queueIn++, queueOut++); // Thread 0 or 1, queues 0 -> 1
// Thread 0 or 1, queues 0 -> 1
if (spWCvMatToOpOutput == nullptr)
mThreadManager.add(threadId++, {wDatumProducer, spWIdGenerator, spWCvMatToOpInput}, queueIn++, queueOut++);
else
mThreadManager.add(threadId++, {wDatumProducer, spWIdGenerator, spWCvMatToOpInput, spWCvMatToOpOutput}, queueIn++, queueOut++);
// Pose estimation & rendering
if (!spWPoses.empty()) // Thread 1 or 2...X, queues 1 -> 2, X = 2 + #GPUs
// Thread 1 or 2...X, queues 1 -> 2, X = 2 + #GPUs
if (!spWPoses.empty())
{
for (auto& wPose : spWPoses)
mThreadManager.add(threadId++, wPose, queueIn, queueOut);
......@@ -336,10 +342,12 @@ namespace op
}
// If custom user Worker in same thread or producer on same thread
// Post processing workers + User post processing workers + Output workers
mThreadManager.add(threadId++, mergeWorkers(mPostProcessingWs, mOutputWs), queueIn++, queueOut++); // Thread 2 or 3, queues 2 -> 3
// Thread 2 or 3, queues 2 -> 3
mThreadManager.add(threadId++, mergeWorkers(mPostProcessingWs, mOutputWs), queueIn++, queueOut++);
// OpenPose GUI
// Thread Y+1, queues Q+1 -> Q+2
if (spWGui != nullptr)
mThreadManager.add(threadId++, spWGui, queueIn++, queueOut++); // Thread Y+1, queues Q+1 -> Q+2
mThreadManager.add(threadId++, spWGui, queueIn++, queueOut++);
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
......
......@@ -62,7 +62,8 @@ int openPoseTutorialPose1()
// Step 1 - Set logging level
// - 0 will output all the logging messages
// - 255 will output nothing
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__);
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Read Google flags (user defined configuration)
......@@ -78,15 +79,18 @@ int openPoseTutorialPose1()
if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.", __LINE__, __FUNCTION__, __FILE__);
op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.",
__LINE__, __FUNCTION__, __FILE__);
// Logging
op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Step 3 - Initialize all required classes
op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_scale_number, (float)FLAGS_scale_gap};
op::CvMatToOpOutput cvMatToOpOutput{outputSize};
op::ScaleAndSizeExtractor scaleAndSizeExtractor(netInputSize, outputSize, FLAGS_scale_number, FLAGS_scale_gap);
op::CvMatToOpInput cvMatToOpInput;
op::CvMatToOpOutput cvMatToOpOutput;
op::PoseExtractorCaffe poseExtractorCaffe{netInputSize, netOutputSize, outputSize, FLAGS_scale_number, poseModel,
FLAGS_model_folder, FLAGS_num_gpu_start};
op::PoseCpuRenderer poseRenderer{poseModel, (float)FLAGS_render_threshold, !FLAGS_disable_blending, (float)FLAGS_alpha_pose};
op::PoseCpuRenderer poseRenderer{poseModel, (float)FLAGS_render_threshold, !FLAGS_disable_blending,
(float)FLAGS_alpha_pose};
op::OpOutputToCvMat opOutputToCvMat;
op::FrameDisplayer frameDisplayer{"OpenPose Tutorial - Example 1", outputSize};
// Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
......@@ -95,22 +99,27 @@ int openPoseTutorialPose1()
// ------------------------- POSE ESTIMATION AND RENDERING -------------------------
// Step 1 - Read and load image, error if empty (possibly wrong path)
cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR); // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
// Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
if(inputImage.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Format input image to OpenPose input and output formats
op::Array<float> netInputArray;
std::vector<float> scaleRatios;
std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage);
const op::Point<int> imageSize{inputImage.cols, inputImage.rows};
// Step 2 - Get desired scale sizes
std::vector<double> scaleInputToNetInputs;
std::vector<op::Point<int>> netInputSizes;
double scaleInputToOutput;
op::Array<float> outputArray;
std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage);
// Step 3 - Estimate poseKeypoints
poseExtractorCaffe.forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios);
op::Point<int> outputResolution;
std::tie(scaleInputToNetInputs, netInputSizes, scaleInputToOutput, outputResolution)
= scaleAndSizeExtractor.extract(imageSize);
// Step 3 - Format input image to OpenPose input and output formats
const auto netInputArray = cvMatToOpInput.createArray(inputImage, scaleInputToNetInputs, netInputSizes);
auto outputArray = cvMatToOpOutput.createArray(inputImage, scaleInputToOutput, outputResolution);
// Step 4 - Estimate poseKeypoints
poseExtractorCaffe.forwardPass(netInputArray, imageSize, scaleInputToNetInputs);
const auto poseKeypoints = poseExtractorCaffe.getPoseKeypoints();
// Step 4 - Render poseKeypoints
// Step 5 - Render poseKeypoints
poseRenderer.renderPose(outputArray, poseKeypoints);
// Step 5 - OpenPose output format to cv::Mat
// Step 6 - OpenPose output format to cv::Mat
auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
// ------------------------- SHOWING RESULT AND CLOSING -------------------------
......
......@@ -67,7 +67,8 @@ int openPoseTutorialPose2()
// Step 1 - Set logging level
// - 0 will output all the logging messages
// - 255 will output nothing
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__);
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Read Google flags (user defined configuration)
......@@ -83,16 +84,18 @@ int openPoseTutorialPose2()
if (FLAGS_alpha_pose < 0. || FLAGS_alpha_pose > 1.)
op::error("Alpha value for blending must be in the range [0,1].", __LINE__, __FUNCTION__, __FILE__);
if (FLAGS_scale_gap <= 0. && FLAGS_scale_number > 1)
op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.", __LINE__, __FUNCTION__, __FILE__);
op::error("Incompatible flag configuration: scale_gap must be greater than 0 or scale_number = 1.",
__LINE__, __FUNCTION__, __FILE__);
// Logging
op::log("", op::Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Step 3 - Initialize all required classes
op::CvMatToOpInput cvMatToOpInput{netInputSize, FLAGS_scale_number, (float)FLAGS_scale_gap};
op::CvMatToOpOutput cvMatToOpOutput{outputSize};
std::shared_ptr<op::PoseExtractor> poseExtractorPtr = std::make_shared<op::PoseExtractorCaffe>(netInputSize, netOutputSize, outputSize,
FLAGS_scale_number, poseModel,
FLAGS_model_folder, FLAGS_num_gpu_start);
op::PoseGpuRenderer poseGpuRenderer{netOutputSize, poseModel, poseExtractorPtr, (float)FLAGS_render_threshold,
op::ScaleAndSizeExtractor scaleAndSizeExtractor(netInputSize, outputSize, FLAGS_scale_number, FLAGS_scale_gap);
op::CvMatToOpInput cvMatToOpInput;
op::CvMatToOpOutput cvMatToOpOutput;
auto poseExtractorPtr = std::make_shared<op::PoseExtractorCaffe>(
netInputSize, netOutputSize, outputSize, FLAGS_scale_number, poseModel, FLAGS_model_folder, FLAGS_num_gpu_start
);
op::PoseGpuRenderer poseGpuRenderer{poseModel, poseExtractorPtr, (float)FLAGS_render_threshold,
!FLAGS_disable_blending, (float)FLAGS_alpha_pose, (float)FLAGS_alpha_heatmap};
poseGpuRenderer.setElementToRender(FLAGS_part_to_show);
op::OpOutputToCvMat opOutputToCvMat;
......@@ -103,23 +106,28 @@ int openPoseTutorialPose2()
// ------------------------- POSE ESTIMATION AND RENDERING -------------------------
// Step 1 - Read and load image, error if empty (possibly wrong path)
cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR); // Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
// Alternative: cv::imread(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
cv::Mat inputImage = op::loadImage(FLAGS_image_path, CV_LOAD_IMAGE_COLOR);
if(inputImage.empty())
op::error("Could not open or find the image: " + FLAGS_image_path, __LINE__, __FUNCTION__, __FILE__);
// Step 2 - Format input image to OpenPose input and output formats
op::Array<float> netInputArray;
std::vector<float> scaleRatios;
std::tie(netInputArray, scaleRatios) = cvMatToOpInput.format(inputImage);
const op::Point<int> imageSize{inputImage.cols, inputImage.rows};
// Step 2 - Get desired scale sizes
std::vector<double> scaleInputToNetInputs;
std::vector<op::Point<int>> netInputSizes;
double scaleInputToOutput;
op::Array<float> outputArray;
std::tie(scaleInputToOutput, outputArray) = cvMatToOpOutput.format(inputImage);
// Step 3 - Estimate poseKeypoints
poseExtractorPtr->forwardPass(netInputArray, {inputImage.cols, inputImage.rows}, scaleRatios);
op::Point<int> outputResolution;
std::tie(scaleInputToNetInputs, netInputSizes, scaleInputToOutput, outputResolution)
= scaleAndSizeExtractor.extract(imageSize);
// Step 3 - Format input image to OpenPose input and output formats
const auto netInputArray = cvMatToOpInput.createArray(inputImage, scaleInputToNetInputs, netInputSizes);
auto outputArray = cvMatToOpOutput.createArray(inputImage, scaleInputToOutput, outputResolution);
// Step 4 - Estimate poseKeypoints
poseExtractorPtr->forwardPass(netInputArray, imageSize, scaleInputToNetInputs);
const auto poseKeypoints = poseExtractorPtr->getPoseKeypoints();
const auto scaleNetToOutput = poseExtractorPtr->getScaleNetToOutput();
// Step 4 - Render pose
// Step 5 - Render pose
poseGpuRenderer.renderPose(outputArray, poseKeypoints, scaleNetToOutput);
// Step 5 - OpenPose output format to cv::Mat
// Step 6 - OpenPose output format to cv::Mat
auto outputImage = opOutputToCvMat.formatToCvMat(outputArray);
// ------------------------- SHOWING RESULT AND CLOSING -------------------------
......
......@@ -192,7 +192,7 @@ public:
{
cv::imshow("User worker GUI", datumsPtr->at(0).cvOutputData);
// Display image and sleeps at least 1 ms (it usually sleeps ~5-10 msec to display the image)
key = cv::waitKey(1);
key = (char)cv::waitKey(1);
}
else
op::log("Nullptr or empty datumsPtr found.", op::Priority::High, __LINE__, __FUNCTION__, __FILE__);
......
......@@ -318,7 +318,7 @@ public:
// Display rendered output image
cv::imshow("User worker GUI", datumsPtr->at(0).cvOutputData);
// Display image and sleeps at least 1 ms (it usually sleeps ~5-10 msec to display the image)
const char key = cv::waitKey(1);
const char key = (char)cv::waitKey(1);
if (key == 27)
this->stop();
}
......
......@@ -232,7 +232,7 @@ public:
{
cv::imshow("User worker GUI", datumsPtr->at(0).cvOutputData);
// Display image and sleeps at least 1 ms (it usually sleeps ~5-10 msec to display the image)
key = cv::waitKey(1);
key = (char)cv::waitKey(1);
}
else
op::log("Nullptr or empty datumsPtr found.", op::Priority::High, __LINE__, __FUNCTION__, __FILE__);
......
......@@ -3,7 +3,7 @@
// Std library most used classes
#include <array>
#include <memory> // std::shared_ptr
#include <memory> // std::shared_ptr, std::unique_ptr
#include <string>
#include <vector>
// OpenPose most used classes
......
#ifndef OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP
#define OPENPOSE_CORE_CV_MAT_TO_OP_INPUT_HPP
#include <utility> // std::pair
#include <opencv2/core/core.hpp> // cv::Mat
#include <openpose/core/common.hpp>
......@@ -10,14 +9,8 @@ namespace op
class OP_API CvMatToOpInput
{
public:
CvMatToOpInput(const Point<int>& netInputResolution, const int scaleNumber = 1, const float scaleGap = 0.25);
std::pair<Array<float>, std::vector<float>> format(const cv::Mat& cvInputData) const;
private:
const int mScaleNumber;
const float mScaleGap;
const std::vector<int> mInputNetSize4D;
Array<float> createArray(const cv::Mat& cvInputData, const std::vector<double>& scaleInputToNetInputs,
const std::vector<Point<int>>& netInputSizes) const;
};
}
......
......@@ -9,14 +9,8 @@ namespace op
class OP_API CvMatToOpOutput
{
public:
// Use outputResolution <= {0,0} to keep input resolution
CvMatToOpOutput(const Point<int>& outputResolution = Point<int>{0, 0}, const bool generateOutput = true);
std::tuple<double, Array<float>> format(const cv::Mat& cvInputData) const;
private:
const bool mGenerateOutput;
const std::vector<int> mOutputSize3D;
Array<float> createArray(const cv::Mat& cvInputData, const double scaleInputToOutput,
const Point<int>& outputResolution) const;
};
}
......
......@@ -8,17 +8,21 @@ namespace op
{
/**
* Datum: The OpenPose Basic Piece of Information Between Threads
* Datum is one the main OpenPose classes/structs. The workers and threads share by default a std::shared_ptr<std::vector<Datum>>. It contains
* all the parameters that the different workers and threads need to exchange.
* Datum is one the main OpenPose classes/structs. The workers and threads share by default a
* std::shared_ptr<std::vector<Datum>>. It contains all the parameters that the different workers and threads need
* to exchange.
*/
struct OP_API Datum
{
// -------------------------------------------------- ID parameters -------------------------------------------------- //
// ---------------------------------------- ID parameters ---------------------------------------- //
unsigned long long id; /**< Datum ID. Internally used to sort the Datums if multi-threading is used. */
std::string name; /**< Name used when saving the data to disk (e.g. `write_images` or `write_keypoint` flags in the demo). */
/**
* Name used when saving the data to disk (e.g. `write_images` or `write_keypoint` flags in the demo).
*/
std::string name;
// -------------------------------------------------- Input image and rendered version parameters -------------------------------------------------- //
// ------------------------------ Input image and rendered version parameters ------------------------------ //
/**
* Original image to be processed in cv::Mat uchar format.
* Size: (input_width x input_height) x 3 channels
......@@ -27,8 +31,10 @@ namespace op
/**
* Original image to be processed in Array<float> format.
* It has been resized to the net input resolution, as well as reformatted Array<float> format to be compatible with the net.
* In case of >1 scales, then each scale is right- and bottom-padded to fill the greatest resolution. The scales are sorted from bigger to smaller.
* It has been resized to the net input resolution, as well as reformatted Array<float> format to be compatible
* with the net.
* In case of >1 scales, then each scale is right- and bottom-padded to fill the greatest resolution. The
* scales are sorted from bigger to smaller.
* Size: #scales x 3 x input_net_height x input_net_width
*/
Array<float> inputNetData;
......@@ -49,7 +55,7 @@ namespace op
*/
cv::Mat cvOutputData;
// -------------------------------------------------- Resulting Array<float> data parameters -------------------------------------------------- //
// ------------------------------ Resulting Array<float> data parameters ------------------------------ //
/**
* Body pose (x,y,score) locations for each person in the image.
* It has been resized to the desired output resolution (e.g. `resolution` flag in the demo).
......@@ -60,10 +66,14 @@ namespace op
/**
* Body pose heatmaps (body parts, background and/or PAFs) for the whole image.
* This parameters is by default empty and disabled for performance. Each group (body parts, background and PAFs) can be individually enabled.
* #heatmaps = #body parts (if enabled) + 1 (if background enabled) + 2 x #PAFs (if enabled). Each PAF has 2 consecutive channels, one for x- and one for y-coordinates.
* Order heatmaps: body parts + background (as appears in POSE_BODY_PART_MAPPING) + (x,y) channel of each PAF (sorted as appears in POSE_BODY_PART_PAIRS). See `pose/poseParameters.hpp`.
* The user can choose the heatmaps normalization: ranges [0, 1], [-1, 1] or [0, 255]. Check the `heatmaps_scale` flag in the examples/tutorial_wrapper/ for more details.
* This parameters is by default empty and disabled for performance. Each group (body parts, background and
* PAFs) can be individually enabled.
* #heatmaps = #body parts (if enabled) + 1 (if background enabled) + 2 x #PAFs (if enabled). Each PAF has 2
* consecutive channels, one for x- and one for y-coordinates.
* Order heatmaps: body parts + background (as appears in POSE_BODY_PART_MAPPING) + (x,y) channel of each PAF
* (sorted as appears in POSE_BODY_PART_PAIRS). See `pose/poseParameters.hpp`.
* The user can choose the heatmaps normalization: ranges [0, 1], [-1, 1] or [0, 255]. Check the
* `heatmaps_scale` flag in the examples/tutorial_wrapper/ for more details.
* Size: #heatmaps x output_net_height x output_net_width
*/
Array<float> poseHeatMaps;
......@@ -111,29 +121,55 @@ namespace op
*/
std::array<Array<float>, 2> handHeatMaps;
// -------------------------------------------------- Other parameters -------------------------------------------------- //
float scaleInputToOutput; /**< Scale ratio between the input Datum::cvInputData and the output Datum::cvOutputData. */
// ---------------------------------------- Other parameters ---------------------------------------- //
/**
* Scale ratio between the input Datum::cvInputData and the net input size.
*/
std::vector<double> scaleInputToNetInputs;
/**
* Size(s) (width x height) of the image(s) fed to the pose deep net.
* The size of the std::vector corresponds to the number of scales.
*/
std::vector<Point<int>> netInputSizes;
/**
* Scale ratio between the input Datum::cvInputData and the output Datum::cvOutputData.
*/
double scaleInputToOutput;
float scaleNetToOutput; /**< Scale ratio between the net output and the final output Datum::cvOutputData. */
/**
* Size (width x height) of the image returned by the deep net.
*/
Point<int> netOutputSize;
std::vector<float> scaleRatios; /**< Scale ratios between each scale (e.g. flag `scale_number`). Used to resize the different scales. */
/**
* Scale ratio between the net output and the final output Datum::cvOutputData.
*/
double scaleNetToOutput;
std::pair<int, std::string> elementRendered; /**< Pair with the element key id POSE_BODY_PART_MAPPING on `pose/poseParameters.hpp` and its mapped value (e.g. 1 and "Neck"). */
/**
* Pair with the element key id POSE_BODY_PART_MAPPING on `pose/poseParameters.hpp` and its mapped value (e.g.
* 1 and "Neck").
*/
std::pair<int, std::string> elementRendered;
// -------------------------------------------------- Functions -------------------------------------------------- //
// ---------------------------------------- Functions ---------------------------------------- //
/**
* Default constructor struct.
* It simply initializes the struct, id is temporary set to 0 and each other variable is assigned to its default value.
* It simply initializes the struct, id is temporary set to 0 and each other variable is assigned to its
* default value.
*/
explicit Datum();
/**
* Copy constructor.
* It performs `fast copy`: For performance purpose, copying a Datum or Array<T> or cv::Mat just copies the reference, it still shares the same internal data.
* It performs `fast copy`: For performance purpose, copying a Datum or Array<T> or cv::Mat just copies the
* reference, it still shares the same internal data.
* Modifying the copied element will modify the original one.
* Use clone() for a slower but real copy, similarly to cv::Mat and Array<T>.
* @param datum Datum to be copied.
......@@ -172,7 +208,8 @@ namespace op
/**
* Clone function.
* Similar to cv::Mat::clone and Array<T>::clone.
* It performs a real but slow copy of the data, i.e., even if the copied element is modified, the original one is not.
* It performs a real but slow copy of the data, i.e., even if the copied element is modified, the original
* one is not.
* @return The resulting Datum.
*/
Datum clone() const;
......@@ -181,7 +218,7 @@ namespace op
// -------------------------------------------------- Comparison operators -------------------------------------------------- //
// ---------------------------------------- Comparison operators ---------------------------------------- //
/**
* Less comparison operator.
* @param datum Datum to be compared.
......
......@@ -21,9 +21,11 @@
#include <openpose/core/renderer.hpp>
#include <openpose/core/resizeAndMergeBase.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
#include <openpose/core/scaleAndSizeExtractor.hpp>
#include <openpose/core/wCvMatToOpInput.hpp>
#include <openpose/core/wCvMatToOpOutput.hpp>
#include <openpose/core/wKeypointScaler.hpp>
#include <openpose/core/wOpOutputToCvMat.hpp>
#include <openpose/core/wScaleAndSizeExtractor.hpp>
#endif // OPENPOSE_CORE_HEADERS_HPP
......@@ -11,9 +11,9 @@ namespace op
public:
explicit KeypointScaler(const ScaleMode scaleMode);
void scale(Array<float>& arrayToScale, const float scaleInputToOutput, const float scaleNetToOutput, const Point<int>& producerSize) const;
void scale(Array<float>& arrayToScale, const double scaleInputToOutput, const double scaleNetToOutput, const Point<int>& producerSize) const;
void scale(std::vector<Array<float>>& arraysToScale, const float scaleInputToOutput, const float scaleNetToOutput, const Point<int>& producerSize) const;
void scale(std::vector<Array<float>>& arraysToScale, const double scaleInputToOutput, const double scaleNetToOutput, const Point<int>& producerSize) const;
private:
const ScaleMode mScaleMode;
......
......@@ -26,10 +26,9 @@
className(const className&) = delete; \
className& operator=(const className&) = delete
// Instantiate a class with all the basic types
#define COMPILE_TEMPLATE_BASIC_TYPES_CLASS(className) COMPILE_TEMPLATE_BASIC_TYPES(className, class)
#define COMPILE_TEMPLATE_BASIC_TYPES_STRUCT(className) COMPILE_TEMPLATE_BASIC_TYPES(className, struct)
#define COMPILE_TEMPLATE_BASIC_TYPES(className, classType) \
template classType OP_API className<char>; \
template classType OP_API className<signed char>; \
......@@ -46,6 +45,22 @@
template classType OP_API className<double>; \
template classType OP_API className<long double>
// Instantiate a class with float and double specifications
#define COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(className) COMPILE_TEMPLATE_FLOATING_TYPES(className, class)
#define COMPILE_TEMPLATE_FLOATING_TYPES_STRUCT(className) COMPILE_TEMPLATE_FLOATING_TYPES(className, struct)
#define COMPILE_TEMPLATE_FLOATING_TYPES(className, classType) \
char gInstantiationGuard##className; \
template classType OP_API className<float>; \
template classType OP_API className<double>
// PIMPL does not work if function arguments need the 3rd-party class. Alternative:
// stackoverflow.com/questions/13978775/how-to-avoid-include-dependency-to-external-library?answertab=active#tab-top
struct dim3;
namespace caffe
{
template <typename T> class Blob;
}
// Includes at the end, since this macros class does not need them, but the files that call this
// file. However, keeping the files at the beginning might create a circular include linking problem.
#include <memory> // std::shared_ptr
......
......@@ -6,10 +6,12 @@
namespace op
{
template <typename T>
OP_API void maximumCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
OP_API void maximumCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize);
template <typename T>
OP_API void maximumGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
OP_API void maximumGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize);
}
#endif // OPENPOSE_CORE_MAXIMUM_BASE_HPP
#ifdef USE_CAFFE
#ifndef OPENPOSE_CORE_MAXIMUM_CAFFE_HPP
#define OPENPOSE_CORE_MAXIMUM_CAFFE_HPP
#include <caffe/blob.hpp>
#include <openpose/core/common.hpp>
namespace op
{
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the compatibility with any generic Caffe version,
// we keep this 'layer' inside our library rather than in the Caffe code.
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep
// the compatibility with any generic Caffe version, we keep this 'layer' inside our library rather than in the
// Caffe code.
template <typename T>
class OP_API MaximumCaffe
{
......@@ -25,9 +24,11 @@ namespace op
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
private:
std::array<int, 4> mBottomSize;
......@@ -36,4 +37,3 @@ namespace op
}
#endif // OPENPOSE_CORE_MAXIMUM_CAFFE_HPP
#endif
#ifdef USE_CAFFE
#ifndef OPENPOSE_CORE_NET_CAFFE_HPP
#define OPENPOSE_CORE_NET_CAFFE_HPP
#include <caffe/net.hpp>
#include <openpose/core/common.hpp>
#include <openpose/core/net.hpp>
......@@ -11,7 +9,8 @@ namespace op
class OP_API NetCaffe : public Net
{
public:
NetCaffe(const std::array<int, 4>& netInputSize4D, const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId = 0,
NetCaffe(const std::array<int, 4>& netInputSize4D, const std::string& caffeProto,
const std::string& caffeTrainedModel, const int gpuId = 0,
const std::string& lastBlobName = "net_output");
virtual ~NetCaffe();
......@@ -29,20 +28,15 @@ namespace op
boost::shared_ptr<caffe::Blob<float>> getOutputBlob() const;
private:
// Init with constructor
const int mGpuId;
const std::array<int, 4> mNetInputSize4D;
const unsigned long mNetInputMemory;
const std::string mCaffeProto;
const std::string mCaffeTrainedModel;
const std::string mLastBlobName;
// Init with thread
std::unique_ptr<caffe::Net<float>> upCaffeNet;
boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
// PIMPL idiom
// http://www.cppsamples.com/common-tasks/pimpl.html
struct ImplNetCaffe;
std::unique_ptr<ImplNetCaffe> upImpl;
// PIMP requires DELETE_COPY & destructor, or extra code
// http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
DELETE_COPY(NetCaffe);
};
}
#endif // OPENPOSE_CORE_NET_CAFFE_HPP
#endif
#ifdef USE_CAFFE
#ifndef OPENPOSE_CORE_NMS_CAFFE_HPP
#define OPENPOSE_CORE_NMS_CAFFE_HPP
#include <caffe/blob.hpp>
#include <openpose/core/common.hpp>
namespace op
{
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the compatibility with any generic Caffe version,
// we keep this 'layer' inside our library rather than in the Caffe code.
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep
// the compatibility with any generic Caffe version, we keep this 'layer' inside our library rather than in the
// Caffe code.
template <typename T>
class OP_API NmsCaffe
{
public:
explicit NmsCaffe();
virtual ~NmsCaffe();
virtual void LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top, const int maxPeaks);
virtual void Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
const int maxPeaks);
virtual inline const char* type() const { return "Nms"; }
......@@ -27,17 +29,24 @@ namespace op
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
private:
T mThreshold;
caffe::Blob<int> mKernelBlob;
std::array<int, 4> mBottomSize;
std::array<int, 4> mTopSize;
// PIMPL idiom
// http://www.cppsamples.com/common-tasks/pimpl.html
struct ImplNmsCaffe;
std::unique_ptr<ImplNmsCaffe> upImpl;
// PIMP requires DELETE_COPY & destructor, or extra code
// http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
DELETE_COPY(NmsCaffe);
};
}
#endif // OPENPOSE_CORE_NMS_CAFFE_HPP
#endif
......@@ -7,11 +7,11 @@ namespace op
{
template <typename T>
OP_API void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const std::vector<T>& scaleRatios = {1});
const std::vector<T>& scaleInputToNetInputs = {1.f});
template <typename T>
OP_API void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize,
const std::vector<T>& scaleRatios = {1});
const std::vector<T>& scaleInputToNetInputs = {1.f});
}
#endif // OPENPOSE_CORE_RESIZE_AND_MERGE_BASE_HPP
#ifdef USE_CAFFE
#ifndef OPENPOSE_CORE_RESIZE_AND_MERGE_CAFFE_HPP
#define OPENPOSE_CORE_RESIZE_AND_MERGE_CAFFE_HPP
#include <caffe/blob.hpp>
#include <openpose/core/common.hpp>
// PIMPL does not work here. Alternative:
// stackoverflow.com/questions/13978775/how-to-avoid-include-dependency-to-external-library?answertab=active#tab-top
namespace caffe
{
template <typename T> class Blob;
}
namespace op
{
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the
// compatibility with any generic Caffe version,
// we keep this 'layer' inside our library rather than in the Caffe code.
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep
// the compatibility with any generic Caffe version, we keep this 'layer' inside our library rather than in the
// Caffe code.
template <typename T>
class OP_API ResizeAndMergeCaffe
{
......@@ -45,4 +50,3 @@ namespace op
}
#endif // OPENPOSE_CORE_RESIZE_AND_MERGE_CAFFE_HPP
#endif
#ifndef OPENPOSE_CORE_SCALE_AND_SIZE_EXTRACTOR_HPP
#define OPENPOSE_CORE_SCALE_AND_SIZE_EXTRACTOR_HPP
#include <tuple>
#include <openpose/core/common.hpp>
namespace op
{
class OP_API ScaleAndSizeExtractor
{
public:
ScaleAndSizeExtractor(const Point<int>& netInputResolution, const Point<int>& outputResolution,
const int scaleNumber = 1, const double scaleGap = 0.25);
std::tuple<std::vector<double>, std::vector<Point<int>>, double, Point<int>> extract(
const Point<int>& inputResolution) const;
private:
const Point<int> mNetInputResolution;
const Point<int> mOutputSize;
const int mScaleNumber;
const double mScaleGap;
};
}
#endif // OPENPOSE_CORE_SCALE_AND_SIZE_EXTRACTOR_HPP
......@@ -29,7 +29,6 @@ namespace op
// Implementation
#include <openpose/utilities/openCv.hpp>
#include <openpose/utilities/pointerContainer.hpp>
namespace op
{
......@@ -57,10 +56,12 @@ namespace op
const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// cv::Mat -> float*
for (auto& tDatum : *tDatums)
std::tie(tDatum.inputNetData, tDatum.scaleRatios) = spCvMatToOpInput->format(tDatum.cvInputData);
tDatum.inputNetData = spCvMatToOpInput->createArray(tDatum.cvInputData,
tDatum.scaleInputToNetInputs,
tDatum.netInputSizes);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,10 +59,11 @@ namespace op
const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// cv::Mat -> float*
for (auto& tDatum : tDatumsNoPtr)
std::tie(tDatum.scaleInputToOutput, tDatum.outputData) = spCvMatToOpOutput->format(tDatum.cvInputData);
tDatum.outputData = spCvMatToOpOutput->createArray(tDatum.cvInputData, tDatum.scaleInputToOutput,
tDatum.netOutputSize);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -56,11 +56,11 @@ namespace op
for (auto& tDatum : *tDatums)
{
std::vector<Array<float>> arraysToScale{tDatum.poseKeypoints, tDatum.handKeypoints[0], tDatum.handKeypoints[1], tDatum.faceKeypoints};
spKeypointScaler->scale(arraysToScale, (float)tDatum.scaleInputToOutput, (float)tDatum.scaleNetToOutput, Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows});
spKeypointScaler->scale(arraysToScale, tDatum.scaleInputToOutput, tDatum.scaleNetToOutput, Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows});
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.cvOutputData = spOpOutputToCvMat->formatToCvMat(tDatum.outputData);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
#ifndef OPENPOSE_CORE_W_SCALE_AND_SIZE_EXTRACTOR_HPP
#define OPENPOSE_CORE_W_SCALE_AND_SIZE_EXTRACTOR_HPP
#include <openpose/core/common.hpp>
#include <openpose/core/scaleAndSizeExtractor.hpp>
#include <openpose/thread/worker.hpp>
namespace op
{
template<typename TDatums>
class WScaleAndSizeExtractor : public Worker<TDatums>
{
public:
explicit WScaleAndSizeExtractor(const std::shared_ptr<ScaleAndSizeExtractor>& scaleAndSizeExtractor);
void initializationOnThread();
void work(TDatums& tDatums);
private:
const std::shared_ptr<ScaleAndSizeExtractor> spScaleAndSizeExtractor;
DELETE_COPY(WScaleAndSizeExtractor);
};
}
// Implementation
#include <openpose/utilities/pointerContainer.hpp>
namespace op
{
template<typename TDatums>
WScaleAndSizeExtractor<TDatums>::WScaleAndSizeExtractor(const std::shared_ptr<ScaleAndSizeExtractor>& scaleAndSizeExtractor) :
spScaleAndSizeExtractor{scaleAndSizeExtractor}
{
}
template<typename TDatums>
void WScaleAndSizeExtractor<TDatums>::initializationOnThread()
{
}
template<typename TDatums>
void WScaleAndSizeExtractor<TDatums>::work(TDatums& tDatums)
{
try
{
if (checkNoNullNorEmpty(tDatums))
{
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Profiling speed
const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// cv::Mat -> float*
for (auto& tDatum : *tDatums)
{
const Point<int> inputSize{tDatum.cvInputData.cols, tDatum.cvInputData.rows};
std::tie(tDatum.scaleInputToNetInputs, tDatum.netInputSizes, tDatum.scaleInputToOutput,
tDatum.netOutputSize) = spScaleAndSizeExtractor->extract(inputSize);
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
}
catch (const std::exception& e)
{
this->stop();
tDatums = nullptr;
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
COMPILE_TEMPLATE_DATUM(WScaleAndSizeExtractor);
}
#endif // OPENPOSE_CORE_W_SCALE_AND_SIZE_EXTRACTOR_HPP
......@@ -11,7 +11,7 @@ namespace op
public:
explicit FaceDetector(const PoseModel poseModel);
std::vector<Rectangle<float>> detectFaces(const Array<float>& poseKeypoints, const float scaleInputToOutput) const;
std::vector<Rectangle<float>> detectFaces(const Array<float>& poseKeypoints, const double scaleInputToOutput) const;
private:
const unsigned int mNeck;
......
#ifndef OPENPOSE_FACE_FACE_EXTRACTOR_HPP
#define OPENPOSE_FACE_FACE_EXTRACTOR_HPP
#include <atomic>
#include <thread>
#include <opencv2/core/core.hpp> // cv::Mat
#include <openpose/core/common.hpp>
#include <openpose/core/maximumCaffe.hpp>
#include <openpose/core/net.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
#include <openpose/core/enumClasses.hpp>
namespace op
{
/**
* Face keypoint extractor class.
*/
class OP_API FaceExtractor
{
public:
/**
* Constructor of the FaceExtractor class.
* @param netInputSize Size at which the cropped image (where the face is located) is resized.
* @param netOutputSize Size of the final results. At the moment, it must be equal than netOutputSize.
*/
explicit FaceExtractor(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
void initializationOnThread();
/**
* Virtual destructor of the HandExtractor class.
* Required to allow inheritance.
*/
virtual ~FaceExtractor();
void forwardPass(const std::vector<Rectangle<float>>& faceRectangles, const cv::Mat& cvInputData,
const float scaleInputToOutput);
/**
* This function must be call before using any other function. It must also be called inside the thread in
* which the functions are going to be used.
*/
void initializationOnThread();
Array<float> getFaceKeypoints() const;
/**
* This function extracts the face keypoints for each detected face in the image.
* @param faceRectangles location of the faces in the image. It is a length-variable std::vector, where
* each index corresponds to a different person in the image. Internally, a op::Rectangle<float>
* (similar to cv::Rect for floating values) with the position of that face (or 0,0,0,0 if
* some face is missing, e.g. if a specific person has only half of the body inside the image).
* @param cvInputData Original image in cv::Mat format and BGR format.
* @param scaleInputToOutput Desired scale of the final keypoints. Set to 1 if the desired size is the
* cvInputData size.
*/
virtual void forwardPass(const std::vector<Rectangle<float>>& faceRectangles, const cv::Mat& cvInputData,
const double scaleInputToOutput) = 0;
Array<float> getHeatMaps() const;
private:
/**
* This function returns the face keypoins. VERY IMPORTANT: use getFaceKeypoints().clone() if the keypoints are
* going to be edited in a different thread.
* @return A Array with all the face keypoints. It follows the pose structure, i.e. the first dimension
* corresponds to all the people in the image, the second to each specific keypoint, and the third one to
* (x, y, score).
*/
Array<float> getFaceKeypoints() const;
protected:
const Point<int> mNetOutputSize;
std::shared_ptr<Net> spNet;
std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
std::shared_ptr<MaximumCaffe<float>> spMaximumCaffe;
Array<float> mFaceImageCrop;
Array<float> mFaceKeypoints;
// HeatMaps parameters
Array<float> mHeatMaps;
const ScaleMode mHeatMapScaleMode;
const std::vector<HeatMapType> mHeatMapTypes;
Array<float> mHeatMaps;
virtual void netInitializationOnThread() = 0;
private:
// Init with thread
boost::shared_ptr<caffe::Blob<float>> spCaffeNetOutputBlob;
std::shared_ptr<caffe::Blob<float>> spHeatMapsBlob;
std::shared_ptr<caffe::Blob<float>> spPeaksBlob;
std::thread::id mThreadId;
void checkThread() const;
......
#ifndef OPENPOSE_FACE_FACE_EXTRACTOR_CAFFE_HPP
#define OPENPOSE_FACE_FACE_EXTRACTOR_CAFFE_HPP
#include <opencv2/core/core.hpp> // cv::Mat
#include <openpose/core/common.hpp>
#include <openpose/core/enumClasses.hpp>
#include <openpose/face/faceExtractor.hpp>
namespace op
{
/**
* Face keypoint extractor class for Caffe framework.
*/
class OP_API FaceExtractorCaffe : public FaceExtractor
{
public:
/**
* Constructor of the FaceExtractor class.
* @param netInputSize Size at which the cropped image (where the face is located) is resized.
* @param netOutputSize Size of the final results. At the moment, it must be equal than netOutputSize.
*/
explicit FaceExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
virtual ~FaceExtractorCaffe();
/**
* This function must be call before using any other function. It must also be called inside the thread in
* which the functions are going to be used.
*/
void netInitializationOnThread();
/**
* This function extracts the face keypoints for each detected face in the image.
* @param faceRectangles location of the faces in the image. It is a length-variable std::vector, where
* each index corresponds to a different person in the image. Internally, a op::Rectangle<float>
* (similar to cv::Rect for floating values) with the position of that face (or 0,0,0,0 if
* some face is missing, e.g. if a specific person has only half of the body inside the image).
* @param cvInputData Original image in cv::Mat format and BGR format.
* @param scaleInputToOutput Desired scale of the final keypoints. Set to 1 if the desired size is the
* cvInputData size.
*/
void forwardPass(const std::vector<Rectangle<float>>& faceRectangles, const cv::Mat& cvInputData,
const double scaleInputToOutput);
private:
// PIMPL idiom
// http://www.cppsamples.com/common-tasks/pimpl.html
struct ImplFaceExtractorCaffe;
std::unique_ptr<ImplFaceExtractorCaffe> upImpl;
// PIMP requires DELETE_COPY & destructor, or extra code
// http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
DELETE_COPY(FaceExtractorCaffe);
};
}
#endif // OPENPOSE_FACE_FACE_EXTRACTOR_CAFFE_HPP
......@@ -5,6 +5,7 @@
#include <openpose/face/faceDetector.hpp>
#include <openpose/face/faceDetectorOpenCV.hpp>
#include <openpose/face/faceExtractor.hpp>
#include <openpose/face/faceExtractorCaffe.hpp>
#include <openpose/face/faceParameters.hpp>
#include <openpose/face/faceCpuRenderer.hpp>
#include <openpose/face/faceGpuRenderer.hpp>
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.faceRectangles = spFaceDetector->detectFaces(tDatum.poseKeypoints, tDatum.scaleInputToOutput);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.faceRectangles = spFaceDetectorOpenCV->detectFaces(tDatum.cvInputData);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -64,7 +64,7 @@ namespace op
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -60,7 +60,7 @@ namespace op
spFaceRenderer->renderFace(tDatum.outputData, tDatum.faceKeypoints);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -63,7 +63,7 @@ namespace op
spCocoJsonSaver->record(tDatum.poseKeypoints, tDatum.name);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -65,7 +65,7 @@ namespace op
spKeypointSaver->saveKeypoints(keypointVector, fileName, "face");
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -70,7 +70,7 @@ namespace op
spKeypointSaver->saveKeypoints(keypointVector, fileName, "hand_right");
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -64,7 +64,7 @@ namespace op
spImageSaver->saveImages(cvOutputDatas, fileName);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -76,7 +76,7 @@ namespace op
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -65,7 +65,7 @@ namespace op
spKeypointSaver->saveKeypoints(keypointVector, fileName, "pose");
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -63,7 +63,7 @@ namespace op
spVideoSaver->write(cvOutputDatas);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -12,7 +12,8 @@ namespace op
public:
GuiInfoAdder(const int numberGpus, const bool guiEnabled = false);
void addInfo(cv::Mat& cvOutputData, const Array<float>& poseKeypoints, const unsigned long long id, const std::string& elementRenderedName);
void addInfo(cv::Mat& cvOutputData, const int numberPeople, const unsigned long long id,
const std::string& elementRenderedName);
private:
// Const variables
......
......@@ -75,7 +75,7 @@ namespace op
if (!tDatumsNoPtr.empty())
{
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
}
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
......
......@@ -56,10 +56,12 @@ namespace op
const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// Add GUI components to frame
for (auto& tDatum : *tDatums)
spGuiInfoAdder->addInfo(tDatum.cvOutputData, tDatum.poseKeypoints, tDatum.id, tDatum.elementRendered.second);
spGuiInfoAdder->addInfo(tDatum.cvOutputData, std::max(tDatum.poseKeypoints.getSize(0),
tDatum.faceKeypoints.getSize(0)),
tDatum.id, tDatum.elementRendered.second);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -14,9 +14,9 @@ namespace op
public:
explicit HandDetector(const PoseModel poseModel);
std::vector<std::array<Rectangle<float>, 2>> detectHands(const Array<float>& poseKeypoints, const float scaleInputToOutput) const;
std::vector<std::array<Rectangle<float>, 2>> detectHands(const Array<float>& poseKeypoints, const double scaleInputToOutput) const;
std::vector<std::array<Rectangle<float>, 2>> trackHands(const Array<float>& poseKeypoints, const float scaleInputToOutput);
std::vector<std::array<Rectangle<float>, 2>> trackHands(const Array<float>& poseKeypoints, const double scaleInputToOutput);
void updateTracker(const std::array<Array<float>, 2>& handKeypoints, const unsigned long long id);
......
#ifndef OPENPOSE_HAND_HAND_EXTRACTOR_HPP
#define OPENPOSE_HAND_HAND_EXTRACTOR_HPP
#include <atomic>
#include <thread>
#include <opencv2/core/core.hpp> // cv::Mat
#include <openpose/core/enumClasses.hpp>
#include <openpose/core/common.hpp>
#include <openpose/core/maximumCaffe.hpp>
#include <openpose/core/net.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
#include <openpose/core/enumClasses.hpp>
namespace op
{
......@@ -22,18 +18,21 @@ namespace op
* Constructor of the HandExtractor class.
* @param netInputSize Size at which the cropped image (where the hand is located) is resized.
* @param netOutputSize Size of the final results. At the moment, it must be equal than netOutputSize.
* @param modelFolder Folder where the models are located.
* @param gpuId The GPU index (0-based) which the deep net will use.
* @param numberScales Number of scales to run. The more scales, the slower it will be but possibly also more
* accurate.
* @param rangeScales The range between the smaller and bigger scale.
*/
explicit HandExtractor(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const unsigned short numberScales = 1, const float rangeScales = 0.4f,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
/**
* Virtual destructor of the HandExtractor class.
* Required to allow inheritance.
*/
virtual ~HandExtractor();
/**
* This function must be call before using any other function. It must also be called inside the thread in
* which the functions are going to be used.
......@@ -42,7 +41,7 @@ namespace op
/**
* This function extracts the hand keypoints for each detected hand in the image.
* @param fpsMode handRectangles Location of the hands in the image. It is a length-variable std::vector, where
* @param handRectangles location of the hands in the image. It is a length-variable std::vector, where
* each index corresponds to a different person in the image. Internally the std::vector, a std::array of 2
* elements: index 0 and 1 for left and right hand respectively. Inside each array element, a
* op::Rectangle<float> (similar to cv::Rect for floating values) with the position of that hand (or 0,0,0,0 if
......@@ -51,45 +50,39 @@ namespace op
* @param scaleInputToOutput Desired scale of the final keypoints. Set to 1 if the desired size is the
* cvInputData size.
*/
void forwardPass(const std::vector<std::array<Rectangle<float>, 2>> handRectangles, const cv::Mat& cvInputData,
const float scaleInputToOutput);
virtual void forwardPass(const std::vector<std::array<Rectangle<float>, 2>> handRectangles,
const cv::Mat& cvInputData,
const double scaleInputToOutput) = 0;
std::array<Array<float>, 2> getHeatMaps() const;
/**
* This function returns the hand keypoins. VERY IMPORTANT: use getHandKeypoints().clone() if the keypoints are
* going to be edited in a different thread.
* @return And std::array with all the left hand keypoints (index 0) and all the right ones (index 1). Each
* @return A std::array with all the left hand keypoints (index 0) and all the right ones (index 1). Each
* Array<float> follows the pose structure, i.e. the first dimension corresponds to all the people in the
* image, the second to each specific keypoint, and the third one to (x, y, score).
*/
std::array<Array<float>, 2> getHandKeypoints() const;
std::array<Array<float>, 2> getHeatMaps() const;
private:
protected:
const std::pair<unsigned short, float> mMultiScaleNumberAndRange;
const Point<int> mNetOutputSize;
std::shared_ptr<Net> spNet;
std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
std::shared_ptr<MaximumCaffe<float>> spMaximumCaffe;
Array<float> mHandImageCrop;
std::array<Array<float>, 2> mHandKeypoints;
// HeatMaps parameters
const ScaleMode mHeatMapScaleMode;
const std::vector<HeatMapType> mHeatMapTypes;
std::array<Array<float>, 2> mHeatMaps;
virtual void netInitializationOnThread() = 0;
private:
// Init with thread
boost::shared_ptr<caffe::Blob<float>> spCaffeNetOutputBlob;
std::shared_ptr<caffe::Blob<float>> spHeatMapsBlob;
std::shared_ptr<caffe::Blob<float>> spPeaksBlob;
std::thread::id mThreadId;
void checkThread() const;
void detectHandKeypoints(Array<float>& handCurrent, const float scaleInputToOutput, const int person,
const cv::Mat& affineMatrix);
Array<float> getHeatMapsFromLastPass() const;
DELETE_COPY(HandExtractor);
};
}
......
#ifndef OPENPOSE_HAND_HAND_EXTRACTOR_CAFFE_HPP
#define OPENPOSE_HAND_HAND_EXTRACTOR_CAFFE_HPP
#include <opencv2/core/core.hpp> // cv::Mat
#include <openpose/core/common.hpp>
#include <openpose/core/enumClasses.hpp>
#include <openpose/hand/handExtractor.hpp>
namespace op
{
/**
* Hand keypoint extractor class for Caffe framework.
*/
class OP_API HandExtractorCaffe : public HandExtractor
{
public:
/**
* Constructor of the HandExtractorCaffe class.
* @param netInputSize Size at which the cropped image (where the hand is located) is resized.
* @param netOutputSize Size of the final results. At the moment, it must be equal than netOutputSize.
* @param modelFolder Folder where the models are located.
* @param gpuId The GPU index (0-based) which the deep net will use.
* @param numberScales Number of scales to run. The more scales, the slower it will be but possibly also more
* accurate.
* @param rangeScales The range between the smaller and bigger scale.
*/
explicit HandExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const unsigned short numberScales = 1, const float rangeScales = 0.4f,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
/**
* Virtual destructor of the HandExtractor class.
* Required to allow inheritance.
*/
virtual ~HandExtractorCaffe();
/**
* This function must be call before using any other function. It must also be called inside the thread in
* which the functions are going to be used.
*/
void netInitializationOnThread();
/**
* This function extracts the hand keypoints for each detected hand in the image.
* @param handRectangles location of the hands in the image. It is a length-variable std::vector, where
* each index corresponds to a different person in the image. Internally the std::vector, a std::array of 2
* elements: index 0 and 1 for left and right hand respectively. Inside each array element, a
* op::Rectangle<float> (similar to cv::Rect for floating values) with the position of that hand (or 0,0,0,0 if
* some hand is missing, e.g. if a specific person has only half of the body inside the image).
* @param cvInputData Original image in cv::Mat format and BGR format.
* @param scaleInputToOutput Desired scale of the final keypoints. Set to 1 if the desired size is the
* cvInputData size.
*/
void forwardPass(const std::vector<std::array<Rectangle<float>, 2>> handRectangles, const cv::Mat& cvInputData,
const double scaleInputToOutput);
private:
// PIMPL idiom
// http://www.cppsamples.com/common-tasks/pimpl.html
struct ImplHandExtractorCaffe;
std::unique_ptr<ImplHandExtractorCaffe> upImpl;
void detectHandKeypoints(Array<float>& handCurrent, const double scaleInputToOutput, const int person,
const cv::Mat& affineMatrix);
Array<float> getHeatMapsFromLastPass() const;
// PIMP requires DELETE_COPY & destructor, or extra code
// http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
DELETE_COPY(HandExtractorCaffe);
};
}
#endif // OPENPOSE_HAND_HAND_EXTRACTOR_CAFFE_HPP
......@@ -5,6 +5,7 @@
#include <openpose/hand/handDetector.hpp>
#include <openpose/hand/handDetectorFromTxt.hpp>
#include <openpose/hand/handExtractor.hpp>
#include <openpose/hand/handExtractorCaffe.hpp>
#include <openpose/hand/handParameters.hpp>
#include <openpose/hand/handCpuRenderer.hpp>
#include <openpose/hand/handGpuRenderer.hpp>
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.handRectangles = spHandDetector->detectHands(tDatum.poseKeypoints, tDatum.scaleInputToOutput);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.handRectangles = spHandDetectorFromTxt->detectHands();
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,7 +59,7 @@ namespace op
tDatum.handRectangles = spHandDetector->trackHands(tDatum.poseKeypoints, tDatum.scaleInputToOutput);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -59,7 +59,7 @@ namespace op
spHandDetector->updateTracker(tDatum.handKeypoints, tDatum.id);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -67,7 +67,7 @@ namespace op
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -60,7 +60,7 @@ namespace op
spHandRenderer->renderHand(tDatum.outputData, tDatum.handKeypoints);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -7,12 +7,16 @@
namespace op
{
template <typename T>
OP_API void connectBodyPartsCpu(Array<T>& poseKeypoints, const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize, const int maxPeaks,
const int interMinAboveThreshold, const T interThreshold, const int minSubsetCnt, const T minSubsetScore, const T scaleFactor = 1.f);
OP_API void connectBodyPartsCpu(Array<T>& poseKeypoints, const T* const heatMapPtr, const T* const peaksPtr,
const PoseModel poseModel, const Point<int>& heatMapSize, const int maxPeaks,
const int interMinAboveThreshold, const T interThreshold, const int minSubsetCnt,
const T minSubsetScore, const T scaleFactor = 1.f);
template <typename T>
OP_API void connectBodyPartsGpu(Array<T>& poseKeypoints, T* posePtr, const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize,
const int maxPeaks, const int interMinAboveThreshold, const T interThreshold, const int minSubsetCnt, const T minSubsetScore, const T scaleFactor = 1.f);
OP_API void connectBodyPartsGpu(Array<T>& poseKeypoints, T* posePtr, const T* const heatMapPtr,
const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize,
const int maxPeaks, const int interMinAboveThreshold, const T interThreshold,
const int minSubsetCnt, const T minSubsetScore, const T scaleFactor = 1.f);
}
#endif // OPENPOSE_POSE_BODY_PARTS_CONNECTOR_HPP
#ifdef USE_CAFFE
#ifndef OPENPOSE_POSE_BODY_PART_CONNECTOR_CAFFE_HPP
#define OPENPOSE_POSE_BODY_PART_CONNECTOR_CAFFE_HPP
#include <caffe/blob.hpp>
#include <openpose/core/common.hpp>
#include <openpose/pose/enumClasses.hpp>
// PIMPL does not work here. Alternative:
// stackoverflow.com/questions/13978775/how-to-avoid-include-dependency-to-external-library?answertab=active#tab-top
namespace caffe
{
template <typename T> class Blob;
}
namespace op
{
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep the compatibility with any generic Caffe version,
// we keep this 'layer' inside our library rather than in the Caffe code.
// It mostly follows the Caffe::layer implementation, so Caffe users can easily use it. However, in order to keep
// the compatibility with any generic Caffe version, we keep this 'layer' inside our library rather than in the
// Caffe code.
template <typename T>
class OP_API BodyPartConnectorCaffe
{
......@@ -36,11 +42,14 @@ namespace op
virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints);
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top, Array<T>& poseKeypoints);
virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
Array<T>& poseKeypoints);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom);
virtual void Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom);
private:
PoseModel mPoseModel;
......@@ -58,4 +67,3 @@ namespace op
}
#endif // OPENPOSE_POSE_BODY_PART_CONNECTOR_CAFFE_HPP
#endif
......@@ -12,19 +12,23 @@ namespace op
class OP_API PoseExtractor
{
public:
PoseExtractor(const Point<int>& netOutputSize, const Point<int>& outputSize, const PoseModel poseModel, const std::vector<HeatMapType>& heatMapTypes = {},
PoseExtractor(const Point<int>& netOutputSize, const Point<int>& outputSize, const PoseModel poseModel,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
virtual ~PoseExtractor();
void initializationOnThread();
virtual void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize, const std::vector<float>& scaleRatios = {1.f}) = 0;
virtual void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize,
const std::vector<double>& scaleRatios = {1.f}) = 0;
virtual const float* getHeatMapCpuConstPtr() const = 0;
virtual const float* getHeatMapGpuConstPtr() const = 0;
virtual std::vector<int> getHeatMapSize() const = 0;
Array<float> getHeatMaps() const;
virtual const float* getPoseGpuConstPtr() const = 0;
......
#ifdef USE_CAFFE
#ifndef OPENPOSE_POSE_POSE_EXTRACTOR_CAFFE_HPP
#define OPENPOSE_POSE_POSE_EXTRACTOR_CAFFE_HPP
#include <caffe/blob.hpp>
#include <openpose/core/common.hpp>
#include <openpose/core/net.hpp>
#include <openpose/core/nmsCaffe.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
#include <openpose/pose/bodyPartConnectorCaffe.hpp>
#include <openpose/pose/enumClasses.hpp>
#include <openpose/pose/poseExtractor.hpp>
......@@ -16,37 +10,37 @@ namespace op
class OP_API PoseExtractorCaffe : public PoseExtractor
{
public:
PoseExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize, const Point<int>& outputSize, const int scaleNumber,
const PoseModel poseModel, const std::string& modelFolder, const int gpuId, const std::vector<HeatMapType>& heatMapTypes = {},
PoseExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const Point<int>& outputSize, const int scaleNumber, const PoseModel poseModel,
const std::string& modelFolder, const int gpuId,
const std::vector<HeatMapType>& heatMapTypes = {},
const ScaleMode heatMapScale = ScaleMode::ZeroToOne);
virtual ~PoseExtractorCaffe();
void netInitializationOnThread();
void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize, const std::vector<float>& scaleRatios = {1.f});
void forwardPass(const Array<float>& inputNetData, const Point<int>& inputDataSize,
const std::vector<double>& scaleRatios = {1.f});
const float* getHeatMapCpuConstPtr() const;
const float* getHeatMapGpuConstPtr() const;
std::vector<int> getHeatMapSize() const;
const float* getPoseGpuConstPtr() const;
private:
const float mResizeScale;
std::shared_ptr<Net> spNet;
std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
std::shared_ptr<NmsCaffe<float>> spNmsCaffe;
std::shared_ptr<BodyPartConnectorCaffe<float>> spBodyPartConnectorCaffe;
// Init with thread
boost::shared_ptr<caffe::Blob<float>> spCaffeNetOutputBlob;
std::shared_ptr<caffe::Blob<float>> spHeatMapsBlob;
std::shared_ptr<caffe::Blob<float>> spPeaksBlob;
std::shared_ptr<caffe::Blob<float>> spPoseBlob;
// PIMPL idiom
// http://www.cppsamples.com/common-tasks/pimpl.html
struct ImplPoseExtractorCaffe;
std::unique_ptr<ImplPoseExtractorCaffe> upImpl;
// PIMP requires DELETE_COPY & destructor, or extra code
// http://oliora.github.io/2015/12/29/pimpl-and-rule-of-zero.html
DELETE_COPY(PoseExtractorCaffe);
};
}
#endif // OPENPOSE_POSE_POSE_EXTRACTOR_CAFFE_HPP
#endif
......@@ -13,9 +13,9 @@ namespace op
class OP_API PoseGpuRenderer : public GpuRenderer, public PoseRenderer
{
public:
PoseGpuRenderer(const Point<int>& heatMapsSize, const PoseModel poseModel,
const std::shared_ptr<PoseExtractor>& poseExtractor, const float renderThreshold,
const bool blendOriginalFrame = true, const float alphaKeypoint = POSE_DEFAULT_ALPHA_KEYPOINT,
PoseGpuRenderer(const PoseModel poseModel, const std::shared_ptr<PoseExtractor>& poseExtractor,
const float renderThreshold, const bool blendOriginalFrame = true,
const float alphaKeypoint = POSE_DEFAULT_ALPHA_KEYPOINT,
const float alphaHeatMap = POSE_DEFAULT_ALPHA_HEAT_MAP,
const unsigned int elementToRender = 0u);
......@@ -27,7 +27,6 @@ namespace op
const float scaleNetToOutput = -1.f);
private:
const Point<int> mHeatMapsSize;
const std::shared_ptr<PoseExtractor> spPoseExtractor;
// Init with thread
float* pGpuPose; // GPU aux memory
......
......@@ -58,14 +58,16 @@ namespace op
// Extract people pose
for (auto& tDatum : *tDatums)
{
spPoseExtractor->forwardPass(tDatum.inputNetData, Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows}, tDatum.scaleRatios);
spPoseExtractor->forwardPass(tDatum.inputNetData,
Point<int>{tDatum.cvInputData.cols, tDatum.cvInputData.rows},
tDatum.scaleInputToNetInputs);
tDatum.poseHeatMaps = spPoseExtractor->getHeatMaps().clone();
tDatum.poseKeypoints = spPoseExtractor->getPoseKeypoints().clone();
tDatum.scaleNetToOutput = spPoseExtractor->getScaleNetToOutput();
}
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -60,7 +60,7 @@ namespace op
tDatum.elementRendered = spPoseRenderer->renderPose(tDatum.outputData, tDatum.poseKeypoints, (float)tDatum.scaleNetToOutput);
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -60,7 +60,7 @@ namespace op
this->stop();
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Return TDatums
......
......@@ -106,7 +106,7 @@ namespace op
{
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -2,8 +2,6 @@
#define OPENPOSE_UTILITIES_CUDA_HPP
#include <utility> // std::pair
#include <cuda.h>
#include <cuda_runtime.h>
#include <openpose/core/common.hpp>
namespace op
......@@ -14,14 +12,14 @@ namespace op
OP_API int getGpuNumber();
inline unsigned int getNumberCudaBlocks(const unsigned int totalRequired, const unsigned int numberCudaThreads = CUDA_NUM_THREADS)
inline unsigned int getNumberCudaBlocks(const unsigned int totalRequired,
const unsigned int numberCudaThreads = CUDA_NUM_THREADS)
{
return (totalRequired + numberCudaThreads - 1) / numberCudaThreads;
}
OP_API dim3 getNumberCudaBlocks(const Point<int>& frameSize, const dim3 numberCudaThreads = dim3{ CUDA_NUM_THREADS, CUDA_NUM_THREADS, 1 });
OP_API std::pair<dim3, dim3> getNumberCudaThreadsAndBlocks(const Point<int>& frameSize);
OP_API void getNumberCudaThreadsAndBlocks(dim3& numberCudaThreads, dim3& numberCudaBlocks,
const Point<int>& frameSize);
}
#endif // OPENPOSE_UTILITIES_CUDA_HPP
......@@ -14,7 +14,7 @@
// const auto profilerKey = Profiler::timerInit(__LINE__, __FUNCTION__, __FILE__);
// // functions to do...
// Profiler::timerEnd(profilerKey);
// Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
// Profiler::printAveragedTimeMsOnIterationX(profilerKey, __LINE__, __FUNCTION__, __FILE__, NUMBER_ITERATIONS);
namespace op
{
......
......@@ -214,6 +214,7 @@ namespace op
std::vector<TWorker> mUserInputWs;
TWorker wDatumProducer;
TWorker spWIdGenerator;
TWorker spWScaleAndSizeExtractor;
TWorker spWCvMatToOpInput;
TWorker spWCvMatToOpOutput;
std::vector<std::vector<TWorker>> spWPoses;
......@@ -581,8 +582,8 @@ namespace op
error("Net input size cannot be -1x-1.", __LINE__, __FUNCTION__, __FILE__);
else if (poseNetInputSize.x == -1 || poseNetInputSize.y == -1)
{
if (producerSize.area() <= 0)
error("Net resolution cannot be -1 for image_dir, only for video and webcam.",
if (producerSize.x <= 0 || producerSize.y <= 0)
error("Net resolution cannot be -1 for image_dir, only for video, webcam, and IP camera.",
__LINE__, __FUNCTION__, __FILE__);
else if (poseNetInputSize.x == -1)
poseNetInputSize.x = 16 * intRound(
......@@ -593,6 +594,10 @@ namespace op
poseNetInputSize.x * producerSize.y / (float) producerSize.x / 16.f
);
}
// Security checks
if ((poseNetInputSize.x > 0 && poseNetInputSize.x % 16 != 0)
|| (poseNetInputSize.y > 0 && poseNetInputSize.y % 16 != 0))
error("Net input resolution must be multiples of 16.", __LINE__, __FUNCTION__, __FILE__);
// Producer
if (wrapperStructInput.producerSharedPtr != nullptr)
......@@ -606,12 +611,29 @@ namespace op
else
wDatumProducer = nullptr;
// Get input scales and sizes
const auto scaleAndSizeExtractor = std::make_shared<ScaleAndSizeExtractor>(
poseNetInputSize, finalOutputSize, wrapperStructPose.scalesNumber, wrapperStructPose.scaleGap
);
spWScaleAndSizeExtractor = std::make_shared<WScaleAndSizeExtractor<TDatumsPtr>>(scaleAndSizeExtractor);
// Input cvMat to OpenPose input & output format
const auto cvMatToOpInput = std::make_shared<CvMatToOpInput>();
spWCvMatToOpInput = std::make_shared<WCvMatToOpInput<TDatumsPtr>>(cvMatToOpInput);
if (renderOutput)
{
const auto cvMatToOpOutput = std::make_shared<CvMatToOpOutput>();
spWCvMatToOpOutput = std::make_shared<WCvMatToOpOutput<TDatumsPtr>>(cvMatToOpOutput);
}
// Pose estimators & renderers
const Point<int>& poseNetOutputSize = poseNetInputSize;
std::vector<std::shared_ptr<PoseExtractor>> poseExtractors;
std::vector<std::shared_ptr<PoseGpuRenderer>> poseGpuRenderers;
std::shared_ptr<PoseCpuRenderer> poseCpuRenderer;
std::vector<TWorker> cpuRenderers;
spWPoses.clear();
spWPoses.resize(gpuNumber);
if (wrapperStructPose.enable)
{
// Pose estimators
......@@ -634,11 +656,11 @@ namespace op
// GPU rendering
if (renderOutputGpu)
{
for (auto gpuId = 0u; gpuId < poseExtractors.size(); gpuId++)
for (const auto& poseExtractor : poseExtractors)
{
poseGpuRenderers.emplace_back(std::make_shared<PoseGpuRenderer>(
poseNetOutputSize, wrapperStructPose.poseModel, poseExtractors[gpuId],
wrapperStructPose.renderThreshold, wrapperStructPose.blendOriginalFrame, alphaKeypoint,
wrapperStructPose.poseModel, poseExtractor, wrapperStructPose.renderThreshold,
wrapperStructPose.blendOriginalFrame, alphaKeypoint,
alphaHeatMap, wrapperStructPose.defaultPartToRender
));
}
......@@ -651,26 +673,13 @@ namespace op
cpuRenderers.emplace_back(std::make_shared<WPoseRenderer<TDatumsPtr>>(poseCpuRenderer));
}
}
}
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Input cvMat to OpenPose format
const auto cvMatToOpInput = std::make_shared<CvMatToOpInput>(
poseNetInputSize, wrapperStructPose.scalesNumber, wrapperStructPose.scaleGap
);
spWCvMatToOpInput = std::make_shared<WCvMatToOpInput<TDatumsPtr>>(cvMatToOpInput);
const auto cvMatToOpOutput = std::make_shared<CvMatToOpOutput>(finalOutputSize, renderOutput);
spWCvMatToOpOutput = std::make_shared<WCvMatToOpOutput<TDatumsPtr>>(cvMatToOpOutput);
// Pose extractor(s)
if (wrapperStructPose.enable)
{
// Pose extractor(s)
spWPoses.resize(poseExtractors.size());
for (auto i = 0u; i < spWPoses.size(); i++)
spWPoses.at(i) = {std::make_shared<WPoseExtractor<TDatumsPtr>>(poseExtractors.at(i))};
}
else
spWPoses.resize(gpuNumber);
// Face extractor(s)
......@@ -693,7 +702,9 @@ namespace op
{
// 1 FaceDetectorOpenCV per thread, OpenCV face detector is not thread-safe
const auto faceDetectorOpenCV = std::make_shared<FaceDetectorOpenCV>(modelFolder);
spWPoses.at(gpu).emplace_back(std::make_shared<WFaceDetectorOpenCV<TDatumsPtr>>(faceDetectorOpenCV));
spWPoses.at(gpu).emplace_back(
std::make_shared<WFaceDetectorOpenCV<TDatumsPtr>>(faceDetectorOpenCV)
);
}
}
// Face keypoint extractor
......@@ -701,7 +712,7 @@ namespace op
{
// Face keypoint extractor
const auto netOutputSize = wrapperStructFace.netInputSize;
const auto faceExtractor = std::make_shared<FaceExtractor>(
const auto faceExtractor = std::make_shared<FaceExtractorCaffe>(
wrapperStructFace.netInputSize, netOutputSize, modelFolder,
gpu + gpuNumberStart, wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale
);
......@@ -726,7 +737,7 @@ namespace op
spWPoses.at(gpu).emplace_back(std::make_shared<WHandDetector<TDatumsPtr>>(handDetector));
// Hand keypoint extractor
const auto netOutputSize = wrapperStructHand.netInputSize;
const auto handExtractor = std::make_shared<HandExtractor>(
const auto handExtractor = std::make_shared<HandExtractorCaffe>(
wrapperStructHand.netInputSize, netOutputSize, modelFolder,
gpu + gpuNumberStart, wrapperStructHand.scalesNumber, wrapperStructHand.scaleRange,
wrapperStructPose.heatMapTypes, wrapperStructPose.heatMapScale
......@@ -884,7 +895,8 @@ namespace op
if (!wrapperStructOutput.writeVideo.empty() && wrapperStructInput.producerSharedPtr != nullptr)
{
if (finalOutputSize.x <= 0 || finalOutputSize.y <= 0)
error("Video can only be recorded if outputSize is known.", __LINE__, __FUNCTION__, __FILE__);
error("Video can only be recorded if outputSize is fixed (e.g. video, webcam, IP camera),"
"but not for a image directory.", __LINE__, __FUNCTION__, __FILE__);
const auto originalVideoFps = (wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) > 0.
? wrapperStructInput.producerSharedPtr->get(CV_CAP_PROP_FPS) : 30.);
const auto videoSaver = std::make_shared<VideoSaver>(
......@@ -1103,6 +1115,7 @@ namespace op
// Reset
mUserInputWs.clear();
wDatumProducer = nullptr;
spWScaleAndSizeExtractor = nullptr;
spWCvMatToOpInput = nullptr;
spWCvMatToOpOutput = nullptr;
spWPoses.clear();
......@@ -1126,7 +1139,7 @@ namespace op
// The less number of queues -> the less lag
// Security checks
if (spWCvMatToOpInput == nullptr || spWCvMatToOpOutput == nullptr)
if (spWScaleAndSizeExtractor == nullptr || spWCvMatToOpInput == nullptr)
error("Configure the Wrapper class before calling `start()`.", __LINE__, __FUNCTION__, __FILE__);
if ((wDatumProducer == nullptr) == (mUserInputWs.empty())
&& mThreadManagerMode != ThreadManagerMode::Asynchronous
......@@ -1159,8 +1172,12 @@ namespace op
mThreadManager.add(mThreadId, mUserInputWs, queueIn++, queueOut++);
threadIdPP();
// Thread 1, queues 1 -> 2
mThreadManager.add(mThreadId, {spWIdGenerator, spWCvMatToOpInput, spWCvMatToOpOutput}, queueIn++,
queueOut++);
if (spWCvMatToOpOutput == nullptr)
mThreadManager.add(mThreadId, {spWIdGenerator, spWScaleAndSizeExtractor, spWCvMatToOpInput},
queueIn++, queueOut++);
else
mThreadManager.add(mThreadId, {spWIdGenerator, spWScaleAndSizeExtractor, spWCvMatToOpInput,
spWCvMatToOpOutput}, queueIn++, queueOut++);
}
// If custom user Worker in same thread or producer on same thread
else
......@@ -1177,7 +1194,12 @@ namespace op
&& mThreadManagerMode != ThreadManagerMode::AsynchronousIn)
error("No input selected.", __LINE__, __FUNCTION__, __FILE__);
workersAux = mergeWorkers(workersAux, {spWIdGenerator, spWCvMatToOpInput, spWCvMatToOpOutput});
if (spWCvMatToOpOutput == nullptr)
workersAux = mergeWorkers(workersAux, {spWIdGenerator, spWScaleAndSizeExtractor,
spWCvMatToOpInput});
else
workersAux = mergeWorkers(workersAux, {spWIdGenerator, spWScaleAndSizeExtractor,
spWCvMatToOpInput, spWCvMatToOpOutput});
// Thread 0 or 1, queues 0 -> 1
mThreadManager.add(mThreadId, workersAux, queueIn++, queueOut++);
}
......
......@@ -19,7 +19,8 @@ cuda_add_library(core
renderer.cpp
resizeAndMergeBase.cpp
resizeAndMergeBase.cu
resizeAndMergeCaffe.cpp)
resizeAndMergeCaffe.cpp
scaleAndSizeExtractor)
target_link_libraries(core ${Caffe_LIBS})
if (BUILD_CAFFE)
......
......@@ -4,24 +4,9 @@
namespace op
{
CvMatToOpInput::CvMatToOpInput(const Point<int>& netInputResolution, const int scaleNumber, const float scaleGap) :
mScaleNumber{scaleNumber},
mScaleGap{scaleGap},
mInputNetSize4D{{mScaleNumber, 3, netInputResolution.y, netInputResolution.x}}
{
try
{
// Security checks
if (netInputResolution.x % 16 != 0 || netInputResolution.y % 16 != 0)
error("Net input resolution must be multiples of 16.", __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
std::pair<Array<float>, std::vector<float>> CvMatToOpInput::format(const cv::Mat& cvInputData) const
Array<float> CvMatToOpInput::createArray(const cv::Mat& cvInputData,
const std::vector<double>& scaleInputToNetInputs,
const std::vector<Point<int>>& netInputSizes) const
{
try
{
......@@ -30,38 +15,26 @@ namespace op
error("Wrong input element (empty cvInputData).", __LINE__, __FUNCTION__, __FILE__);
if (cvInputData.channels() != 3)
error("Input images must be 3-channel BGR.", __LINE__, __FUNCTION__, __FILE__);
if (scaleInputToNetInputs.size() != netInputSizes.size())
error("scaleInputToNetInputs.size() != netInputSizes.size().", __LINE__, __FUNCTION__, __FILE__);
// inputNetData - Reescale keeping aspect ratio and transform to float the input deep net image
Array<float> inputNetData{mInputNetSize4D};
std::vector<float> scaleRatios(mScaleNumber, 1.f);
const auto numberScales = (int)scaleInputToNetInputs.size();
Array<float> inputNetData{{numberScales, 3, netInputSizes.at(0).y, netInputSizes.at(0).x}};
std::vector<double> scaleRatios(numberScales, 1.f);
const auto inputNetDataOffset = inputNetData.getVolume(1, 3);
for (auto i = 0; i < mScaleNumber; i++)
for (auto i = 0; i < numberScales; i++)
{
const auto currentScale = 1.f - i*mScaleGap;
if (currentScale < 0.f || 1.f < currentScale)
error("All scales must be in the range [0, 1], i.e. 0 <= 1-scale_number*scale_gap <= 1", __LINE__, __FUNCTION__, __FILE__);
const auto netInputWidth = inputNetData.getSize(3);
const auto targetWidth = fastTruncate(intRound(netInputWidth * currentScale) / 16 * 16, 1, netInputWidth);
const auto netInputHeight = inputNetData.getSize(2);
const auto targetHeight = fastTruncate(intRound(netInputHeight * currentScale) / 16 * 16, 1, netInputHeight);
const Point<int> targetSize{targetWidth, targetHeight};
const auto scale = resizeGetScaleFactor(Point<int>{cvInputData.cols, cvInputData.rows}, targetSize);
const cv::Mat frameWithNetSize = resizeFixedAspectRatio(cvInputData, scale, Point<int>{netInputWidth, netInputHeight});
const cv::Mat frameWithNetSize = resizeFixedAspectRatio(cvInputData, scaleInputToNetInputs[i],
netInputSizes[i]);
// Fill inputNetData
uCharCvMatToFloatPtr(inputNetData.getPtr() + i * inputNetDataOffset, frameWithNetSize, true);
// Fill scaleRatios
scaleRatios[i] = {(float)scale};
if (i > 0)
scaleRatios[i] /= scaleRatios[0];
}
scaleRatios.at(0) /= scaleRatios[0];
return std::make_pair(inputNetData, scaleRatios);
return inputNetData;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return std::make_pair(Array<float>{}, std::vector<float>{});
return Array<float>{};
}
}
}
......@@ -3,13 +3,7 @@
namespace op
{
CvMatToOpOutput::CvMatToOpOutput(const Point<int>& outputResolution, const bool generateOutput) :
mGenerateOutput{generateOutput},
mOutputSize3D{3, outputResolution.y, outputResolution.x}
{
}
std::tuple<double, Array<float>> CvMatToOpOutput::format(const cv::Mat& cvInputData) const
Array<float> CvMatToOpOutput::createArray(const cv::Mat& cvInputData, const double scaleInputToOutput, const Point<int>& outputResolution) const
{
try
{
......@@ -18,38 +12,18 @@ namespace op
error("Wrong input element (empty cvInputData).", __LINE__, __FUNCTION__, __FILE__);
if (cvInputData.channels() != 3)
error("Input images must be 3-channel BGR.", __LINE__, __FUNCTION__, __FILE__);
// scaleInputToOutput - Scale between input and desired output size
double scaleInputToOutput;
Point<int> outputResolution;
// Output = mOutputSize3D size
if (mOutputSize3D[1] > 0 && mOutputSize3D[2] > 0)
{
outputResolution = Point<int>{mOutputSize3D[2], mOutputSize3D[1]};
scaleInputToOutput = resizeGetScaleFactor(Point<int>{cvInputData.cols, cvInputData.rows},
outputResolution);
}
// Output = input size
else
{
outputResolution = Point<int>{cvInputData.cols, cvInputData.rows};
scaleInputToOutput = 1.;
}
// outputData - Reescale keeping aspect ratio and transform to float the output image
Array<float> outputData;
if (mGenerateOutput)
{
const cv::Mat frameWithOutputSize = resizeFixedAspectRatio(cvInputData, scaleInputToOutput,
outputResolution);
outputData.reset({3, outputResolution.y, outputResolution.x});
uCharCvMatToFloatPtr(outputData.getPtr(), frameWithOutputSize, false);
}
const cv::Mat frameWithOutputSize = resizeFixedAspectRatio(cvInputData, scaleInputToOutput,
outputResolution);
Array<float> outputData({3, outputResolution.y, outputResolution.x});
uCharCvMatToFloatPtr(outputData.getPtr(), frameWithOutputSize, false);
// Return result
return std::make_tuple(scaleInputToOutput, outputData);
return outputData;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return std::make_tuple(0., Array<float>{});
return Array<float>{};
}
}
}
......@@ -25,9 +25,10 @@ namespace op
handRectangles{datum.handRectangles},
handKeypoints(datum.handKeypoints), // Parentheses instead of braces to avoid error in GCC 4.8
// Other parameters
scaleInputToNetInputs{datum.scaleInputToNetInputs},
netInputSizes{datum.netInputSizes},
scaleInputToOutput{datum.scaleInputToOutput},
scaleNetToOutput{datum.scaleNetToOutput},
scaleRatios{datum.scaleRatios},
elementRendered{datum.elementRendered}
{
}
......@@ -53,9 +54,10 @@ namespace op
handRectangles = datum.handRectangles,
handKeypoints = datum.handKeypoints,
// Other parameters
scaleInputToNetInputs = datum.scaleInputToNetInputs;
netInputSizes = datum.netInputSizes;
scaleInputToOutput = datum.scaleInputToOutput;
scaleNetToOutput = datum.scaleNetToOutput;
scaleRatios = datum.scaleRatios;
elementRendered = datum.elementRendered;
// Return
return *this;
......@@ -92,7 +94,8 @@ namespace op
std::swap(handRectangles, datum.handRectangles);
std::swap(handKeypoints, datum.handKeypoints);
// Other parameters
std::swap(scaleRatios, datum.scaleRatios);
std::swap(scaleInputToNetInputs, datum.scaleInputToNetInputs);
std::swap(netInputSizes, datum.netInputSizes);
std::swap(elementRendered, datum.elementRendered);
}
catch (const std::exception& e)
......@@ -122,9 +125,8 @@ namespace op
std::swap(handRectangles, datum.handRectangles);
std::swap(handKeypoints, datum.handKeypoints);
// Other parameters
scaleInputToOutput = datum.scaleInputToOutput;
scaleNetToOutput = datum.scaleNetToOutput;
std::swap(scaleRatios, datum.scaleRatios);
std::swap(scaleInputToNetInputs, datum.scaleInputToNetInputs);
std::swap(netInputSizes, datum.netInputSizes);
std::swap(elementRendered, datum.elementRendered);
// Return
return *this;
......@@ -163,9 +165,10 @@ namespace op
datum.handKeypoints[0] = handKeypoints[0].clone();
datum.handKeypoints[1] = handKeypoints[1].clone();
// Other parameters
datum.scaleInputToNetInputs = scaleInputToNetInputs;
datum.netInputSizes = netInputSizes;
datum.scaleInputToOutput = scaleInputToOutput;
datum.scaleNetToOutput = scaleNetToOutput;
datum.scaleRatios = scaleRatios;
datum.elementRendered = elementRendered;
// Return
return std::move(datum);
......
......@@ -6,4 +6,5 @@ namespace op
DEFINE_TEMPLATE_DATUM(WCvMatToOpOutput);
DEFINE_TEMPLATE_DATUM(WKeypointScaler);
DEFINE_TEMPLATE_DATUM(WOpOutputToCvMat);
DEFINE_TEMPLATE_DATUM(WScaleAndSizeExtractor);
}
#ifndef CPU_ONLY
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
......@@ -6,26 +6,26 @@
namespace op
{
void checkAndIncreaseGpuMemory(std::shared_ptr<float*>& gpuMemoryPtr,
std::shared_ptr<std::atomic<unsigned long long>>& currentVolumePtr,
const unsigned long long memoryVolume)
{
try
#ifdef USE_CUDA
void checkAndIncreaseGpuMemory(std::shared_ptr<float*>& gpuMemoryPtr,
std::shared_ptr<std::atomic<unsigned long long>>& currentVolumePtr,
const unsigned long long memoryVolume)
{
#ifndef CPU_ONLY
try
{
if (*currentVolumePtr < memoryVolume)
{
*currentVolumePtr = memoryVolume;
cudaFree(*gpuMemoryPtr);
cudaMalloc((void**)(gpuMemoryPtr.get()), *currentVolumePtr * sizeof(float));
}
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
#endif
GpuRenderer::GpuRenderer(const float renderThreshold, const float alphaKeypoint,
const float alphaHeatMap, const bool blendOriginalFrame,
......@@ -44,7 +44,7 @@ namespace op
{
try
{
#ifndef CPU_ONLY
#ifdef USE_CUDA
if (mIsLastRenderer)
cudaFree(*spGpuMemory);
#endif
......@@ -97,7 +97,7 @@ namespace op
{
try
{
#ifndef CPU_ONLY
#ifdef USE_CUDA
if (!*spGpuMemoryAllocated)
{
checkAndIncreaseGpuMemory(spGpuMemory, spVolume, memoryVolume);
......@@ -105,8 +105,10 @@ namespace op
*spGpuMemoryAllocated = true;
}
#else
error("GPU rendering not available if `CPU_ONLY` is set.", __LINE__, __FUNCTION__, __FILE__);
UNUSED(cpuMemory);
UNUSED(memoryVolume);
error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......@@ -119,7 +121,7 @@ namespace op
{
try
{
#ifndef CPU_ONLY
#ifdef USE_CUDA
if (*spGpuMemoryAllocated && mIsLastRenderer)
{
if (*spVolume < memoryVolume)
......@@ -129,8 +131,10 @@ namespace op
*spGpuMemoryAllocated = false;
}
#else
error("GPU rendering not available if `CPU_ONLY` is set.", __LINE__, __FUNCTION__, __FILE__);
UNUSED(cpuMemory);
UNUSED(memoryVolume);
error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......
......@@ -8,7 +8,8 @@ namespace op
{
}
void KeypointScaler::scale(Array<float>& arrayToScale, const float scaleInputToOutput, const float scaleNetToOutput, const Point<int>& producerSize) const
void KeypointScaler::scale(Array<float>& arrayToScale, const double scaleInputToOutput,
const double scaleNetToOutput, const Point<int>& producerSize) const
{
try
{
......@@ -21,7 +22,8 @@ namespace op
}
}
void KeypointScaler::scale(std::vector<Array<float>>& arrayToScalesToScale, const float scaleInputToOutput, const float scaleNetToOutput, const Point<int>& producerSize) const
void KeypointScaler::scale(std::vector<Array<float>>& arrayToScalesToScale, const double scaleInputToOutput,
const double scaleNetToOutput, const Point<int>& producerSize) const
{
try
{
......@@ -30,15 +32,15 @@ namespace op
// InputResolution
if (mScaleMode == ScaleMode::InputResolution)
for (auto& arrayToScale : arrayToScalesToScale)
scaleKeypoints(arrayToScale, 1.f/scaleInputToOutput);
scaleKeypoints(arrayToScale, float(1./scaleInputToOutput));
// NetOutputResolution
else if (mScaleMode == ScaleMode::NetOutputResolution)
for (auto& arrayToScale : arrayToScalesToScale)
scaleKeypoints(arrayToScale, 1.f/scaleNetToOutput);
scaleKeypoints(arrayToScale, float(1./scaleNetToOutput));
// [0,1]
else if (mScaleMode == ScaleMode::ZeroToOne)
{
const auto scale = 1.f/scaleInputToOutput;
const auto scale = float(1./scaleInputToOutput);
const auto scaleX = scale / ((float)producerSize.x - 1.f);
const auto scaleY = scale / ((float)producerSize.y - 1.f);
for (auto& arrayToScale : arrayToScalesToScale)
......@@ -47,7 +49,7 @@ namespace op
// [-1,1]
else if (mScaleMode == ScaleMode::PlusMinusOne)
{
const auto scale = 2.f/scaleInputToOutput;
const auto scale = float(2./scaleInputToOutput);
const auto scaleX = (scale / ((float)producerSize.x - 1.f));
const auto scaleY = (scale / ((float)producerSize.y - 1.f));
const auto offset = -1.f;
......
......@@ -4,12 +4,12 @@
namespace op
{
template <typename T>
void maximumCpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize)
void maximumCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize)
{
try
{
UNUSED(targetPtr);
UNUSED(kernelPtr);
UNUSED(sourcePtr);
UNUSED(targetSize);
UNUSED(sourceSize);
......@@ -25,14 +25,14 @@ namespace op
// const auto numberParts = targetSize[2];
// const auto numberSubparts = targetSize[3];
// // log("sourceSize[0]: " + std::to_string(sourceSize[0])); // = 1
// // log("sourceSize[1]: " + std::to_string(sourceSize[1])); // = #body parts + bck = 22 (hands) or 71 (face)
// // log("sourceSize[2]: " + std::to_string(sourceSize[2])); // = 368 = height
// // log("sourceSize[3]: " + std::to_string(sourceSize[3])); // = 368 = width
// // log("targetSize[0]: " + std::to_string(targetSize[0])); // = 1
// // log("targetSize[1]: " + std::to_string(targetSize[1])); // = 1
// // log("targetSize[2]: " + std::to_string(targetSize[2])); // = 21(hands) or 70 (face)
// // log("targetSize[3]: " + std::to_string(targetSize[3])); // = 3 = [x, y, score]
// // log("sourceSize[0]: " + std::to_string(sourceSize[0])); // = 1
// // log("sourceSize[1]: " + std::to_string(sourceSize[1])); // = #body_parts+bck=22(hands) or 71(face)
// // log("sourceSize[2]: " + std::to_string(sourceSize[2])); // = 368 = height
// // log("sourceSize[3]: " + std::to_string(sourceSize[3])); // = 368 = width
// // log("targetSize[0]: " + std::to_string(targetSize[0])); // = 1
// // log("targetSize[1]: " + std::to_string(targetSize[1])); // = 1
// // log("targetSize[2]: " + std::to_string(targetSize[2])); // = 21(hands) or 70 (face)
// // log("targetSize[3]: " + std::to_string(targetSize[3])); // = 3 = [x, y, score]
// // log(" ");
// for (auto n = 0; n < num; n++)
// {
......@@ -45,7 +45,8 @@ namespace op
// auto* targetPtrOffsetted = targetPtr + (offsetChannel + part) * numberSubparts;
// const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset;
// // Option a - 6.3 fps
// const auto sourceIndexIterator = thrust::max_element(thrust::host, sourcePtrOffsetted, sourcePtrOffsetted + imageOffset);
// const auto sourceIndexIterator = thrust::max_element(thrust::host, sourcePtrOffsetted,
// sourcePtrOffsetted + imageOffset);
// const auto sourceIndex = (int)(sourceIndexIterator - sourcePtrOffsetted);
// targetPtrOffsetted[0] = sourceIndex % width;
// targetPtrOffsetted[1] = sourceIndex / width;
......@@ -60,6 +61,8 @@ namespace op
}
}
template void maximumCpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
template void maximumCpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
template void maximumCpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize);
template void maximumCpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize);
}
#ifdef USE_CAFFE
#include <caffe/blob.hpp>
#endif
#include <openpose/core/maximumBase.hpp>
#include <openpose/core/maximumCaffe.hpp>
......@@ -7,17 +9,34 @@ namespace op
template <typename T>
MaximumCaffe<T>::MaximumCaffe()
{
try
{
#ifndef USE_CAFFE
error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T>
void MaximumCaffe<T>::LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void MaximumCaffe<T>::LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 1", __LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CAFFE
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 1", __LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -26,26 +45,34 @@ namespace op
}
template <typename T>
void MaximumCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void MaximumCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
#ifdef USE_CAFFE
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
// Bottom shape
std::vector<int> bottomShape = bottomBlob->shape();
// Bottom shape
std::vector<int> bottomShape = bottomBlob->shape();
// Top shape
std::vector<int> topShape{bottomShape};
topShape[1] = 1; // Unnecessary
topShape[2] = bottomShape[1]-1; // Number parts + bck - 1
topShape[3] = 3; // X, Y, score
topBlob->Reshape(topShape);
// Top shape
std::vector<int> topShape{bottomShape};
topShape[1] = 1; // Unnecessary
topShape[2] = bottomShape[1]-1; // Number parts + bck - 1
topShape[3] = 3; // X, Y, score
topBlob->Reshape(topShape);
// Array sizes
mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1), topBlob->shape(2), topBlob->shape(3)};
mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1), bottomBlob->shape(2), bottomBlob->shape(3)};
// Array sizes
mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1), topBlob->shape(2),
topBlob->shape(3)};
mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1), bottomBlob->shape(2),
bottomBlob->shape(3)};
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -54,11 +81,17 @@ namespace op
}
template <typename T>
void MaximumCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void MaximumCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
maximumGpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize);
#ifdef USE_CAFFE
maximumCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -67,11 +100,19 @@ namespace op
}
template <typename T>
void MaximumCaffe<T>::Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void MaximumCaffe<T>::Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
maximumGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize);
#if defined USE_CAFFE && defined USE_CUDA
maximumGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize);
#else
UNUSED(bottom);
UNUSED(top);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
......@@ -80,14 +121,18 @@ namespace op
}
template <typename T>
void MaximumCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void MaximumCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top,
const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
}
catch (const std::exception& e)
{
......@@ -96,14 +141,20 @@ namespace op
}
template <typename T>
void MaximumCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void MaximumCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top,
const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
#endif
}
catch (const std::exception& e)
{
......@@ -111,7 +162,5 @@ namespace op
}
}
INSTANTIATE_CLASS(MaximumCaffe);
COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(MaximumCaffe);
}
#endif
#ifdef USE_CAFFE
#include <numeric> // std::accumulate
#ifdef USE_CAFFE
#include <caffe/net.hpp>
#endif
#include <openpose/utilities/cuda.hpp>
#include <openpose/core/netCaffe.hpp>
namespace op
{
NetCaffe::NetCaffe(const std::array<int, 4>& netInputSize4D, const std::string& caffeProto, const std::string& caffeTrainedModel, const int gpuId, const std::string& lastBlobName) :
mGpuId{gpuId},
// mNetInputSize4D{netInputSize4D}, // This line crashes on some devices with old G++
mNetInputSize4D{netInputSize4D[0], netInputSize4D[1], netInputSize4D[2], netInputSize4D[3]},
mNetInputMemory{std::accumulate(mNetInputSize4D.begin(), mNetInputSize4D.end(), 1, std::multiplies<int>()) * sizeof(float)},
mCaffeProto{caffeProto},
mCaffeTrainedModel{caffeTrainedModel},
mLastBlobName{lastBlobName}
struct NetCaffe::ImplNetCaffe
{
#ifdef USE_CAFFE
// Init with constructor
const int mGpuId;
const std::array<int, 4> mNetInputSize4D;
const unsigned long mNetInputMemory;
const std::string mCaffeProto;
const std::string mCaffeTrainedModel;
const std::string mLastBlobName;
// Init with thread
std::unique_ptr<caffe::Net<float>> upCaffeNet;
boost::shared_ptr<caffe::Blob<float>> spOutputBlob;
ImplNetCaffe(const std::array<int, 4>& netInputSize4D, const std::string& caffeProto,
const std::string& caffeTrainedModel, const int gpuId, const std::string& lastBlobName) :
mGpuId{gpuId},
// mNetInputSize4D{netInputSize4D}, // This line crashes on some devices with old G++
mNetInputSize4D{netInputSize4D[0], netInputSize4D[1], netInputSize4D[2], netInputSize4D[3]},
mNetInputMemory{sizeof(float) * std::accumulate(mNetInputSize4D.begin(), mNetInputSize4D.end(), 1,
std::multiplies<int>())},
mCaffeProto{caffeProto},
mCaffeTrainedModel{caffeTrainedModel},
mLastBlobName{lastBlobName}
{
}
#endif
};
NetCaffe::NetCaffe(const std::array<int, 4>& netInputSize4D, const std::string& caffeProto,
const std::string& caffeTrainedModel, const int gpuId, const std::string& lastBlobName)
#ifdef USE_CAFFE
: upImpl{new ImplNetCaffe{netInputSize4D, caffeProto, caffeTrainedModel, gpuId, lastBlobName}}
#endif
{
try
{
#ifndef USE_CAFFE
UNUSED(netInputSize4D);
UNUSED(caffeProto);
UNUSED(caffeTrainedModel);
UNUSED(gpuId);
UNUSED(lastBlobName);
error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
NetCaffe::~NetCaffe()
......@@ -24,19 +68,23 @@ namespace op
{
try
{
// Initialize net
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(mGpuId);
upCaffeNet.reset(new caffe::Net<float>{mCaffeProto, caffe::TEST});
upCaffeNet->CopyTrainedLayersFrom(mCaffeTrainedModel);
upCaffeNet->blobs()[0]->Reshape({mNetInputSize4D[0], mNetInputSize4D[1], mNetInputSize4D[2], mNetInputSize4D[3]});
upCaffeNet->Reshape();
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Set spOutputBlob
spOutputBlob = upCaffeNet->blob_by_name(mLastBlobName);
if (spOutputBlob == nullptr)
error("The output blob is a nullptr. Did you use the same name than the prototxt? (Used: " + mLastBlobName + ").", __LINE__, __FUNCTION__, __FILE__);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CAFFE
// Initialize net
caffe::Caffe::set_mode(caffe::Caffe::GPU);
caffe::Caffe::SetDevice(upImpl->mGpuId);
upImpl->upCaffeNet.reset(new caffe::Net<float>{upImpl->mCaffeProto, caffe::TEST});
upImpl->upCaffeNet->CopyTrainedLayersFrom(upImpl->mCaffeTrainedModel);
upImpl->upCaffeNet->blobs()[0]->Reshape({upImpl->mNetInputSize4D[0], upImpl->mNetInputSize4D[1],
upImpl->mNetInputSize4D[2], upImpl->mNetInputSize4D[3]});
upImpl->upCaffeNet->Reshape();
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Set spOutputBlob
upImpl->spOutputBlob = upImpl->upCaffeNet->blob_by_name(upImpl->mLastBlobName);
if (upImpl->spOutputBlob == nullptr)
error("The output blob is a nullptr. Did you use the same name than the prototxt? (Used: "
+ upImpl->mLastBlobName + ").", __LINE__, __FUNCTION__, __FILE__);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
......@@ -48,7 +96,11 @@ namespace op
{
try
{
return upCaffeNet->blobs().at(0)->mutable_cpu_data();
#ifdef USE_CAFFE
return upImpl->upCaffeNet->blobs().at(0)->mutable_cpu_data();
#else
return nullptr;
#endif
}
catch (const std::exception& e)
{
......@@ -61,7 +113,11 @@ namespace op
{
try
{
return upCaffeNet->blobs().at(0)->mutable_gpu_data();
#ifdef USE_CAFFE
return upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
#else
return nullptr;
#endif
}
catch (const std::exception& e)
{
......@@ -74,15 +130,26 @@ namespace op
{
try
{
// Copy frame data to GPU memory
if (inputData != nullptr)
{
auto* gpuImagePtr = upCaffeNet->blobs().at(0)->mutable_gpu_data();
cudaMemcpy(gpuImagePtr, inputData, mNetInputMemory, cudaMemcpyHostToDevice);
}
// Perform deep network forward pass
upCaffeNet->ForwardFrom(0);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CAFFE
// Copy frame data to GPU memory
if (inputData != nullptr)
{
#ifdef USE_CUDA
auto* gpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_gpu_data();
cudaMemcpy(gpuImagePtr, inputData, upImpl->mNetInputMemory, cudaMemcpyHostToDevice);
#else
auto* cpuImagePtr = upImpl->upCaffeNet->blobs().at(0)->mutable_cpu_data();
std::copy(inputData,
inputData + upImpl->mNetInputMemory/sizeof(float),
cpuImagePtr);
#endif
}
// Perform deep network forward pass
upImpl->upCaffeNet->ForwardFrom(0);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(inputData);
#endif
}
catch (const std::exception& e)
{
......@@ -94,7 +161,11 @@ namespace op
{
try
{
return spOutputBlob;
#ifdef USE_CAFFE
return upImpl->spOutputBlob;
#else
return nullptr;
#endif
}
catch (const std::exception& e)
{
......@@ -103,5 +174,3 @@ namespace op
}
}
}
#endif
#ifdef USE_CAFFE
#include <caffe/blob.hpp>
#endif
#include <openpose/core/nmsBase.hpp>
#include <openpose/core/nmsCaffe.hpp>
namespace op
{
template <typename T>
NmsCaffe<T>::NmsCaffe()
struct NmsCaffe<T>::ImplNmsCaffe
{
#ifdef USE_CAFFE
caffe::Blob<int> mKernelBlob;
std::array<int, 4> mBottomSize;
std::array<int, 4> mTopSize;
#endif
ImplNmsCaffe(){};
};
template <typename T>
NmsCaffe<T>::NmsCaffe() :
upImpl{new ImplNmsCaffe{}}
{
try
{
#ifndef USE_CAFFE
error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T>
NmsCaffe<T>::~NmsCaffe()
{
}
......@@ -14,10 +45,15 @@ namespace op
{
try
{
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 1", __LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CAFFE
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 1", __LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -26,27 +62,36 @@ namespace op
}
template <typename T>
void NmsCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top, const int maxPeaks)
void NmsCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
const int maxPeaks)
{
try
{
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
#ifdef USE_CAFFE
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
// Bottom shape
std::vector<int> bottomShape = bottomBlob->shape();
// Bottom shape
std::vector<int> bottomShape = bottomBlob->shape();
// Top shape
std::vector<int> topShape{bottomShape};
topShape[1] = bottomShape[1]-1; // Number parts + bck - 1
topShape[2] = maxPeaks+1; // # maxPeaks + 1
topShape[3] = 3; // X, Y, score
topBlob->Reshape(topShape);
mKernelBlob.Reshape(bottomShape);
// Top shape
std::vector<int> topShape{bottomShape};
topShape[1] = bottomShape[1]-1; // Number parts + bck - 1
topShape[2] = maxPeaks+1; // # maxPeaks + 1
topShape[3] = 3; // X, Y, score
topBlob->Reshape(topShape);
upImpl->mKernelBlob.Reshape(bottomShape);
// Array sizes
mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1), topBlob->shape(2), topBlob->shape(3)};
mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1), bottomBlob->shape(2), bottomBlob->shape(3)};
// Array sizes
upImpl->mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1),
topBlob->shape(2), topBlob->shape(3)};
upImpl->mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1),
bottomBlob->shape(2), bottomBlob->shape(3)};
#else
UNUSED(bottom);
UNUSED(top);
UNUSED(maxPeaks);
#endif
}
catch (const std::exception& e)
{
......@@ -72,7 +117,13 @@ namespace op
{
try
{
nmsGpu(top.at(0)->mutable_cpu_data(), mKernelBlob.mutable_cpu_data(), bottom.at(0)->cpu_data(), mThreshold, mTopSize, mBottomSize);
#ifdef USE_CAFFE
nmsCpu(top.at(0)->mutable_cpu_data(), upImpl->mKernelBlob.mutable_cpu_data(), bottom.at(0)->cpu_data(),
mThreshold, upImpl->mTopSize, upImpl->mBottomSize);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -85,7 +136,15 @@ namespace op
{
try
{
nmsGpu(top.at(0)->mutable_gpu_data(), mKernelBlob.mutable_gpu_data(), bottom.at(0)->gpu_data(), mThreshold, mTopSize, mBottomSize);
#if defined USE_CAFFE && defined USE_CUDA
nmsGpu(top.at(0)->mutable_gpu_data(), upImpl->mKernelBlob.mutable_gpu_data(),
bottom.at(0)->gpu_data(), mThreshold, upImpl->mTopSize, upImpl->mBottomSize);
#else
UNUSED(bottom);
UNUSED(top);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
......@@ -94,14 +153,17 @@ namespace op
}
template <typename T>
void NmsCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void NmsCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
}
catch (const std::exception& e)
{
......@@ -110,14 +172,17 @@ namespace op
}
template <typename T>
void NmsCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down, const std::vector<caffe::Blob<T>*>& bottom)
void NmsCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
{
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
}
catch (const std::exception& e)
{
......@@ -125,7 +190,5 @@ namespace op
}
}
INSTANTIATE_CLASS(NmsCaffe);
COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(NmsCaffe);
}
#endif
......@@ -5,13 +5,13 @@ namespace op
{
template <typename T>
void resizeAndMergeCpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleRatios)
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleInputToNetInputs)
{
try
{
UNUSED(targetPtr);
UNUSED(sourcePtr);
UNUSED(scaleRatios);
UNUSED(scaleInputToNetInputs);
UNUSED(targetSize);
UNUSED(sourceSize);
error("CPU version not completely implemented.", __LINE__, __FUNCTION__, __FILE__);
......@@ -61,7 +61,7 @@ namespace op
}
template void resizeAndMergeCpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleRatios);
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleInputToNetInputs);
template void resizeAndMergeCpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleRatios);
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleInputToNetInputs);
}
......@@ -25,7 +25,7 @@ namespace op
}
template <typename T>
__global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T* scaleRatios,
__global__ void resizeKernelAndMerge(T* targetPtr, const T* const sourcePtr, const int sourceNumOffset, const int num, const T* scaleInputToNetInputs,
const int sourceWidth, const int sourceHeight, const int targetWidth, const int targetHeight)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
......@@ -38,8 +38,8 @@ namespace op
// targetPixel = -1000.f; // For fastMax
for (auto n = 0; n < num; n++)
{
const auto currentWidth = sourceWidth * scaleRatios[n];
const auto currentHeight = sourceHeight * scaleRatios[n];
const auto currentWidth = sourceWidth * scaleInputToNetInputs[n] / scaleInputToNetInputs[0];
const auto currentHeight = sourceHeight * scaleInputToNetInputs[n] / scaleInputToNetInputs[0];
const auto scaleWidth = targetWidth / currentWidth;
const auto scaleHeight = targetHeight / currentHeight;
......@@ -58,7 +58,7 @@ namespace op
template <typename T>
void resizeAndMergeGpu(T* targetPtr, const T* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleRatios)
const std::array<int, 4>& sourceSize, const std::vector<T>& scaleInputToNetInputs)
{
try
{
......@@ -92,24 +92,24 @@ namespace op
// Multi-scale merging
else
{
// If scale_number > 1 --> scaleRatios must be set
if (scaleRatios.size() != num)
// If scale_number > 1 --> scaleInputToNetInputs must be set
if (scaleInputToNetInputs.size() != num)
error("The scale ratios size must be equal than the number of scales.", __LINE__, __FUNCTION__, __FILE__);
const auto maxScales = 10;
if (scaleRatios.size() > maxScales)
if (scaleInputToNetInputs.size() > maxScales)
error("The maximum number of scales is " + std::to_string(maxScales) + ".", __LINE__, __FUNCTION__, __FILE__);
// Copy scaleRatios
T* scaleRatiosGpuPtr;
cudaMalloc((void**)&scaleRatiosGpuPtr, maxScales * sizeof(T));
cudaMemcpy(scaleRatiosGpuPtr, scaleRatios.data(), scaleRatios.size() * sizeof(T), cudaMemcpyHostToDevice);
// Copy scaleInputToNetInputs
T* scaleInputToNetInputsPtr;
cudaMalloc((void**)&scaleInputToNetInputsPtr, maxScales * sizeof(T));
cudaMemcpy(scaleInputToNetInputsPtr, scaleInputToNetInputs.data(), scaleInputToNetInputs.size() * sizeof(T), cudaMemcpyHostToDevice);
// Perform resize + merging
const auto sourceNumOffset = channels * sourceChannelOffset;
for (auto c = 0 ; c < channels ; c++)
resizeKernelAndMerge<<<numBlocks, threadsPerBlock>>>(targetPtr + c * targetChannelOffset,
sourcePtr + c * sourceChannelOffset, sourceNumOffset,
num, scaleRatiosGpuPtr, sourceWidth, sourceHeight, targetWidth, targetHeight);
num, scaleInputToNetInputsPtr, sourceWidth, sourceHeight, targetWidth, targetHeight);
// Free memory
cudaFree(scaleRatiosGpuPtr);
cudaFree(scaleInputToNetInputsPtr);
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
......@@ -121,7 +121,7 @@ namespace op
}
template void resizeAndMergeGpu(float* targetPtr, const float* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleRatios);
const std::array<int, 4>& sourceSize, const std::vector<float>& scaleInputToNetInputs);
template void resizeAndMergeGpu(double* targetPtr, const double* const sourcePtr, const std::array<int, 4>& targetSize,
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleRatios);
const std::array<int, 4>& sourceSize, const std::vector<double>& scaleInputToNetInputs);
}
#ifdef USE_CAFFE
#include <caffe/blob.hpp>
#endif
#include <openpose/core/resizeAndMergeBase.hpp>
#include <openpose/utilities/fastMath.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
......@@ -9,17 +11,34 @@ namespace op
ResizeAndMergeCaffe<T>::ResizeAndMergeCaffe() :
mScaleRatios{1}
{
try
{
#ifndef USE_CAFFE
error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T>
void ResizeAndMergeCaffe<T>::LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void ResizeAndMergeCaffe<T>::LayerSetUp(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 2", __LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CAFFE
if (top.size() != 1)
error("top.size() != 1", __LINE__, __FUNCTION__, __FILE__);
if (bottom.size() != 1)
error("bottom.size() != 2", __LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -28,24 +47,34 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top,
const float factor, const bool mergeFirstDimension)
void ResizeAndMergeCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top, const float factor,
const bool mergeFirstDimension)
{
try
{
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
#ifdef USE_CAFFE
auto bottomBlob = bottom.at(0);
auto topBlob = top.at(0);
// Top shape
auto topShape = bottomBlob->shape();
topShape[0] = (mergeFirstDimension ? 1 : bottomBlob->shape(0));
topShape[2] = intRound(topShape[2] * factor);
topShape[3] = intRound(topShape[3] * factor);
topBlob->Reshape(topShape);
// Top shape
auto topShape = bottomBlob->shape();
topShape[0] = (mergeFirstDimension ? 1 : bottomBlob->shape(0));
topShape[2] = intRound(topShape[2] * factor);
topShape[3] = intRound(topShape[3] * factor);
topBlob->Reshape(topShape);
// Array sizes
mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1), topBlob->shape(2), topBlob->shape(3)};
mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1), bottomBlob->shape(2), bottomBlob->shape(3)};
// Array sizes
mTopSize = std::array<int, 4>{topBlob->shape(0), topBlob->shape(1), topBlob->shape(2),
topBlob->shape(3)};
mBottomSize = std::array<int, 4>{bottomBlob->shape(0), bottomBlob->shape(1),
bottomBlob->shape(2), bottomBlob->shape(3)};
#else
UNUSED(bottom);
UNUSED(top);
UNUSED(factor);
UNUSED(mergeFirstDimension);
#endif
}
catch (const std::exception& e)
{
......@@ -67,11 +96,18 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void ResizeAndMergeCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize, mScaleRatios);
#ifdef USE_CAFFE
resizeAndMergeCpu(top.at(0)->mutable_cpu_data(), bottom.at(0)->cpu_data(), mTopSize, mBottomSize,
mScaleRatios);
#else
UNUSED(bottom);
UNUSED(top);
#endif
}
catch (const std::exception& e)
{
......@@ -80,11 +116,20 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
void ResizeAndMergeCaffe<T>::Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom,
const std::vector<caffe::Blob<T>*>& top)
{
try
{
resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize, mScaleRatios);
#if defined USE_CAFFE && defined USE_CUDA
resizeAndMergeGpu(top.at(0)->mutable_gpu_data(), bottom.at(0)->gpu_data(), mTopSize, mBottomSize,
mScaleRatios);
#else
UNUSED(bottom);
UNUSED(top);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
......@@ -93,7 +138,8 @@ namespace op
}
template <typename T>
void ResizeAndMergeCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top, const std::vector<bool>& propagate_down,
void ResizeAndMergeCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top,
const std::vector<bool>& propagate_down,
const std::vector<caffe::Blob<T>*>& bottom)
{
try
......@@ -101,7 +147,9 @@ namespace op
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
}
catch (const std::exception& e)
{
......@@ -118,7 +166,9 @@ namespace op
UNUSED(top);
UNUSED(propagate_down);
UNUSED(bottom);
NOT_IMPLEMENTED;
#ifdef USE_CAFFE
NOT_IMPLEMENTED;
#endif
}
catch (const std::exception& e)
{
......@@ -126,7 +176,5 @@ namespace op
}
}
INSTANTIATE_CLASS(ResizeAndMergeCaffe);
COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(ResizeAndMergeCaffe);
}
#endif
#include <openpose/utilities/fastMath.hpp>
#include <openpose/utilities/openCv.hpp> // resizeGetScaleFactor
#include <openpose/core/scaleAndSizeExtractor.hpp>
namespace op
{
ScaleAndSizeExtractor::ScaleAndSizeExtractor(const Point<int>& netInputResolution,
const Point<int>& outputResolution, const int scaleNumber,
const double scaleGap) :
mNetInputResolution{netInputResolution},
mOutputSize{outputResolution},
mScaleNumber{scaleNumber},
mScaleGap{scaleGap}
{
try
{
// Security checks
if ((netInputResolution.x > 0 && netInputResolution.x % 16 != 0)
|| (netInputResolution.y > 0 && netInputResolution.y % 16 != 0))
error("Net input resolution must be multiples of 16.", __LINE__, __FUNCTION__, __FILE__);
if (scaleNumber < 1)
error("There must be at least 1 scale.", __LINE__, __FUNCTION__, __FILE__);
if (scaleGap <= 0.)
error("The gap between scales must be strictly positive.", __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
std::tuple<std::vector<double>, std::vector<Point<int>>, double, Point<int>> ScaleAndSizeExtractor::extract(
const Point<int>& inputResolution) const
{
try
{
// Security checks
if (inputResolution.area() <= 0)
error("Wrong input element (empty cvInputData).", __LINE__, __FUNCTION__, __FILE__);
// scaleRatios & sizes - Reescale keeping aspect ratio
std::vector<double> scaleRatios(mScaleNumber, 1.f);
std::vector<Point<int>> sizes(mScaleNumber);
for (auto i = 0; i < mScaleNumber; i++)
{
const auto currentScale = 1. - i*mScaleGap;
if (currentScale < 0. || 1. < currentScale)
error("All scales must be in the range [0, 1], i.e. 0 <= 1-scale_number*scale_gap <= 1",
__LINE__, __FUNCTION__, __FILE__);
const auto targetWidth = fastTruncate(intRound(mNetInputResolution.x * currentScale) / 16 * 16, 1,
mNetInputResolution.x);
const auto targetHeight = fastTruncate(intRound(mNetInputResolution.y * currentScale) / 16 * 16, 1,
mNetInputResolution.y);
const Point<int> targetSize{targetWidth, targetHeight};
scaleRatios[i] = resizeGetScaleFactor(inputResolution, targetSize);
sizes[i] = mNetInputResolution;
}
// scaleInputToOutput - Scale between input and desired output size
Point<int> outputResolution;
double scaleInputToOutput;
// Output = mOutputSize3D size
if (mOutputSize.x > 0 && mOutputSize.y > 0)
{
outputResolution = mOutputSize;
scaleInputToOutput = resizeGetScaleFactor(inputResolution, outputResolution);
}
// Output = input size
else
{
outputResolution = inputResolution;
scaleInputToOutput = 1.;
}
// Return result
return std::make_tuple(scaleRatios, sizes, scaleInputToOutput, outputResolution);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return std::make_tuple(std::vector<double>{}, std::vector<Point<int>>{}, 1., Point<int>{});
}
}
}
......@@ -3,6 +3,7 @@ set(SOURCES
faceDetector.cpp
faceDetectorOpenCV.cpp
faceExtractor.cpp
faceExtractorCaffe.cpp
faceCpuRenderer.cpp
faceGpuRenderer.cpp
renderFace.cpp
......
......@@ -108,7 +108,7 @@ namespace op
}
}
std::vector<Rectangle<float>> FaceDetector::detectFaces(const Array<float>& poseKeypoints, const float scaleInputToOutput) const
std::vector<Rectangle<float>> FaceDetector::detectFaces(const Array<float>& poseKeypoints, const double scaleInputToOutput) const
{
try
{
......@@ -119,7 +119,7 @@ namespace op
// Otherwise, get face position(s)
if (!poseKeypoints.empty())
for (auto person = 0 ; person < numberPeople ; person++)
faceRectangles.at(person) = getFaceFromPoseKeypoints(poseKeypoints, person, mNeck, mNose, mLEar, mREar, mLEye, mREye, threshold) / scaleInputToOutput;
faceRectangles.at(person) = getFaceFromPoseKeypoints(poseKeypoints, person, mNeck, mNose, mLEar, mREar, mLEye, mREye, threshold) / (float)scaleInputToOutput;
return faceRectangles;
}
catch (const std::exception& e)
......
#include <opencv2/opencv.hpp> // CV_WARP_INVERSE_MAP, CV_INTER_LINEAR
#include <openpose/core/netCaffe.hpp>
#include <openpose/face/faceParameters.hpp>
#include <openpose/utilities/check.hpp>
#include <openpose/utilities/cuda.hpp>
#include <openpose/utilities/fastMath.hpp>
#include <openpose/utilities/openCv.hpp>
#include <openpose/face/faceExtractor.hpp>
namespace op
{
void updateFaceHeatMapsForPerson(Array<float>& heatMaps, const int person, const ScaleMode heatMapScaleMode,
const float* heatMapsGpuPtr)
{
try
{
// Copy memory
const auto channelOffset = heatMaps.getVolume(2, 3);
const auto volumeBodyParts = FACE_NUMBER_PARTS * channelOffset;
auto totalOffset = 0u;
auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts];
// Copy face parts
cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost);
// Change from [0,1] to [-1,1]
if (heatMapScaleMode == ScaleMode::PlusMinusOne)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]) * 2.f - 1.f;
// [0, 255]
else if (heatMapScaleMode == ScaleMode::UnsignedChar)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = (float)intRound(fastTruncate(heatMapsPtr[i]) * 255.f);
// Avoid values outside original range
else
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]);
totalOffset += (unsigned int)volumeBodyParts;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
FaceExtractor::FaceExtractor(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const std::vector<HeatMapType>& heatMapTypes, const ScaleMode heatMapScale) :
mNetOutputSize{netOutputSize},
spNet{std::make_shared<NetCaffe>(std::array<int,4>{1, 3, mNetOutputSize.y, mNetOutputSize.x},
modelFolder + FACE_PROTOTXT, modelFolder + FACE_TRAINED_MODEL, gpuId)},
spResizeAndMergeCaffe{std::make_shared<ResizeAndMergeCaffe<float>>()},
spMaximumCaffe{std::make_shared<MaximumCaffe<float>>()},
mFaceImageCrop{mNetOutputSize.area()*3},
mHeatMapScaleMode{heatMapScale},
mHeatMapTypes{heatMapTypes}
......@@ -77,153 +34,18 @@ namespace op
}
}
void FaceExtractor::initializationOnThread()
FaceExtractor::~FaceExtractor()
{
try
{
// Logging
log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Get thread id
mThreadId = {std::this_thread::get_id()};
// Caffe net
spNet->initializationOnThread();
spCaffeNetOutputBlob = ((NetCaffe*)spNet.get())->getOutputBlob();
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// HeatMaps extractor blob and layer
spHeatMapsBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
const bool mergeFirstDimension = true;
spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()},
FACE_CCN_DECREASE_FACTOR, mergeFirstDimension);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Pose extractor blob and layer
spPeaksBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
spMaximumCaffe->Reshape({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Logging
log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void FaceExtractor::forwardPass(const std::vector<Rectangle<float>>& faceRectangles, const cv::Mat& cvInputData,
const float scaleInputToOutput)
void FaceExtractor::initializationOnThread()
{
try
{
if (!faceRectangles.empty())
{
// Security checks
if (cvInputData.empty())
error("Empty cvInputData.", __LINE__, __FUNCTION__, __FILE__);
// Fix parameters
const auto netInputSide = fastMin(mNetOutputSize.x, mNetOutputSize.y);
// Set face size
const auto numberPeople = (int)faceRectangles.size();
mFaceKeypoints.reset({numberPeople, (int)FACE_NUMBER_PARTS, 3}, 0);
// HeatMaps: define size
if (!mHeatMapTypes.empty())
mHeatMaps.reset({numberPeople, (int)FACE_NUMBER_PARTS, mNetOutputSize.y, mNetOutputSize.x});
// // Debugging
// cv::Mat cvInputDataCopy = cvInputData.clone();
// Extract face keypoints for each person
for (auto person = 0 ; person < numberPeople ; person++)
{
const auto& faceRectangle = faceRectangles.at(person);
// Only consider faces with a minimum pixel area
const auto minFaceSize = fastMin(faceRectangle.width, faceRectangle.height);
// // Debugging -> red rectangle
// log(std::to_string(cvInputData.cols) + " " + std::to_string(cvInputData.rows));
// cv::rectangle(cvInputDataCopy,
// cv::Point{(int)faceRectangle.x, (int)faceRectangle.y},
// cv::Point{(int)faceRectangle.bottomRight().x, (int)faceRectangle.bottomRight().y},
// cv::Scalar{0,0,255}, 2);
// Get parts
if (minFaceSize > 40)
{
// // Debugging -> green rectangle overwriting red one
// log(std::to_string(cvInputData.cols) + " " + std::to_string(cvInputData.rows));
// cv::rectangle(cvInputDataCopy,
// cv::Point{(int)faceRectangle.x, (int)faceRectangle.y},
// cv::Point{(int)faceRectangle.bottomRight().x,
// (int)faceRectangle.bottomRight().y},
// cv::Scalar{0,255,0}, 2);
// Resize and shift image to face rectangle positions
const auto faceSize = fastMax(faceRectangle.width, faceRectangle.height);
const double scaleFace = faceSize / (double)netInputSide;
cv::Mat Mscaling = cv::Mat::eye(2, 3, CV_64F);
Mscaling.at<double>(0,0) = scaleFace;
Mscaling.at<double>(1,1) = scaleFace;
Mscaling.at<double>(0,2) = faceRectangle.x;
Mscaling.at<double>(1,2) = faceRectangle.y;
cv::Mat faceImage;
cv::warpAffine(cvInputData, faceImage, Mscaling, cv::Size{mNetOutputSize.x, mNetOutputSize.y},
CV_INTER_LINEAR | CV_WARP_INVERSE_MAP, cv::BORDER_CONSTANT, cv::Scalar(0,0,0));
// cv::Mat -> float*
uCharCvMatToFloatPtr(mFaceImageCrop.getPtr(), faceImage, true);
// // Debugging
// if (person < 5)
// cv::imshow("faceImage" + std::to_string(person), faceImage);
// 1. Caffe deep network
auto* inputDataGpuPtr = spNet->getInputDataGpuPtr();
cudaMemcpy(inputDataGpuPtr, mFaceImageCrop.getPtr(), mNetOutputSize.area() * 3 * sizeof(float),
cudaMemcpyHostToDevice);
spNet->forwardPass();
// 2. Resize heat maps + merge different scales
#ifndef CPU_ONLY
spResizeAndMergeCaffe->Forward_gpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
spResizeAndMergeCaffe->Forward_cpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()});
#endif
// 3. Get peaks by Non-Maximum Suppression
#ifndef CPU_ONLY
spMaximumCaffe->Forward_gpu({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
spMaximumCaffe->Forward_cpu({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
#endif
const auto* facePeaksPtr = spPeaksBlob->mutable_cpu_data();
for (auto part = 0 ; part < mFaceKeypoints.getSize(1) ; part++)
{
const auto xyIndex = part * mFaceKeypoints.getSize(2);
const auto x = facePeaksPtr[xyIndex];
const auto y = facePeaksPtr[xyIndex + 1];
const auto score = facePeaksPtr[xyIndex + 2];
const auto baseIndex = mFaceKeypoints.getSize(2)
* (part + person * mFaceKeypoints.getSize(1));
mFaceKeypoints[baseIndex] = (float)(scaleInputToOutput * (Mscaling.at<double>(0,0) * x
+ Mscaling.at<double>(0,1) * y
+ Mscaling.at<double>(0,2)));
mFaceKeypoints[baseIndex+1] = (float)(scaleInputToOutput * (Mscaling.at<double>(1,0) * x
+ Mscaling.at<double>(1,1) * y
+ Mscaling.at<double>(1,2)));
mFaceKeypoints[baseIndex+2] = score;
}
// HeatMaps: storing
if (!mHeatMapTypes.empty())
updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode,
spHeatMapsBlob->gpu_data());
}
}
// // Debugging
// cv::imshow("AcvInputDataCopy", cvInputDataCopy);
}
else
mFaceKeypoints.reset();
// Get thread id
mThreadId = {std::this_thread::get_id()};
// Deep net initialization
netInitializationOnThread();
}
catch (const std::exception& e)
{
......
#if defined USE_CAFFE && defined USE_CUDA
#include <caffe/blob.hpp>
#endif
#include <opencv2/opencv.hpp> // CV_WARP_INVERSE_MAP, CV_INTER_LINEAR
#include <openpose/core/maximumCaffe.hpp>
#include <openpose/core/netCaffe.hpp>
#include <openpose/core/resizeAndMergeCaffe.hpp>
#include <openpose/face/faceParameters.hpp>
#include <openpose/utilities/cuda.hpp>
#include <openpose/utilities/fastMath.hpp>
#include <openpose/utilities/openCv.hpp>
#include <openpose/face/faceExtractorCaffe.hpp>
namespace op
{
struct FaceExtractorCaffe::ImplFaceExtractorCaffe
{
#if defined USE_CAFFE && defined USE_CUDA
std::shared_ptr<NetCaffe> spNetCaffe;
std::shared_ptr<ResizeAndMergeCaffe<float>> spResizeAndMergeCaffe;
std::shared_ptr<MaximumCaffe<float>> spMaximumCaffe;
// Init with thread
boost::shared_ptr<caffe::Blob<float>> spCaffeNetOutputBlob;
std::shared_ptr<caffe::Blob<float>> spHeatMapsBlob;
std::shared_ptr<caffe::Blob<float>> spPeaksBlob;
ImplFaceExtractorCaffe(const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId) :
spNetCaffe{std::make_shared<NetCaffe>(std::array<int,4>{1, 3, netOutputSize.y, netOutputSize.x},
modelFolder + FACE_PROTOTXT, modelFolder + FACE_TRAINED_MODEL,
gpuId)},
spResizeAndMergeCaffe{std::make_shared<ResizeAndMergeCaffe<float>>()},
spMaximumCaffe{std::make_shared<MaximumCaffe<float>>()}
{
}
#endif
};
#if defined USE_CAFFE && defined USE_CUDA
void updateFaceHeatMapsForPerson(Array<float>& heatMaps, const int person, const ScaleMode heatMapScaleMode,
const float* heatMapsGpuPtr)
{
try
{
// Copy memory
const auto channelOffset = heatMaps.getVolume(2, 3);
const auto volumeBodyParts = FACE_NUMBER_PARTS * channelOffset;
auto totalOffset = 0u;
auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts];
// Copy face parts
cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost);
// Change from [0,1] to [-1,1]
if (heatMapScaleMode == ScaleMode::PlusMinusOne)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]) * 2.f - 1.f;
// [0, 255]
else if (heatMapScaleMode == ScaleMode::UnsignedChar)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = (float)intRound(fastTruncate(heatMapsPtr[i]) * 255.f);
// Avoid values outside original range
else
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]);
totalOffset += (unsigned int)volumeBodyParts;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
#endif
FaceExtractorCaffe::FaceExtractorCaffe(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId,
const std::vector<HeatMapType>& heatMapTypes,
const ScaleMode heatMapScale) :
FaceExtractor{netInputSize, netOutputSize, heatMapTypes, heatMapScale}
#if defined USE_CAFFE && defined USE_CUDA
, upImpl{new ImplFaceExtractorCaffe{mNetOutputSize, modelFolder, gpuId}}
#endif
{
try
{
#if !defined USE_CAFFE || !defined USE_CUDA
UNUSED(netInputSize);
UNUSED(netOutputSize);
UNUSED(modelFolder);
UNUSED(gpuId);
UNUSED(heatMapTypes);
UNUSED(heatMapScale);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
FaceExtractorCaffe::~FaceExtractorCaffe()
{
}
void FaceExtractorCaffe::netInitializationOnThread()
{
try
{
#if defined USE_CAFFE && defined USE_CUDA
// Logging
log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Caffe net
upImpl->spNetCaffe->initializationOnThread();
upImpl->spCaffeNetOutputBlob = upImpl->spNetCaffe->getOutputBlob();
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// HeatMaps extractor blob and layer
upImpl->spHeatMapsBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
const bool mergeFirstDimension = true;
upImpl->spResizeAndMergeCaffe->Reshape({upImpl->spCaffeNetOutputBlob.get()},
{upImpl->spHeatMapsBlob.get()},
FACE_CCN_DECREASE_FACTOR, mergeFirstDimension);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Pose extractor blob and layer
upImpl->spPeaksBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
upImpl->spMaximumCaffe->Reshape({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Logging
log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void FaceExtractorCaffe::forwardPass(const std::vector<Rectangle<float>>& faceRectangles,
const cv::Mat& cvInputData,
const double scaleInputToOutput)
{
try
{
#if defined USE_CAFFE && defined USE_CUDA
if (!faceRectangles.empty())
{
// Security checks
if (cvInputData.empty())
error("Empty cvInputData.", __LINE__, __FUNCTION__, __FILE__);
// Fix parameters
const auto netInputSide = fastMin(mNetOutputSize.x, mNetOutputSize.y);
// Set face size
const auto numberPeople = (int)faceRectangles.size();
mFaceKeypoints.reset({numberPeople, (int)FACE_NUMBER_PARTS, 3}, 0);
// HeatMaps: define size
if (!mHeatMapTypes.empty())
mHeatMaps.reset({numberPeople, (int)FACE_NUMBER_PARTS, mNetOutputSize.y, mNetOutputSize.x});
// // Debugging
// cv::Mat cvInputDataCopy = cvInputData.clone();
// Extract face keypoints for each person
for (auto person = 0 ; person < numberPeople ; person++)
{
const auto& faceRectangle = faceRectangles.at(person);
// Only consider faces with a minimum pixel area
const auto minFaceSize = fastMin(faceRectangle.width, faceRectangle.height);
// // Debugging -> red rectangle
// log(std::to_string(cvInputData.cols) + " " + std::to_string(cvInputData.rows));
// cv::rectangle(cvInputDataCopy,
// cv::Point{(int)faceRectangle.x, (int)faceRectangle.y},
// cv::Point{(int)faceRectangle.bottomRight().x,
// (int)faceRectangle.bottomRight().y},
// cv::Scalar{0,0,255}, 2);
// Get parts
if (minFaceSize > 40)
{
// // Debugging -> green rectangle overwriting red one
// log(std::to_string(cvInputData.cols) + " " + std::to_string(cvInputData.rows));
// cv::rectangle(cvInputDataCopy,
// cv::Point{(int)faceRectangle.x, (int)faceRectangle.y},
// cv::Point{(int)faceRectangle.bottomRight().x,
// (int)faceRectangle.bottomRight().y},
// cv::Scalar{0,255,0}, 2);
// Resize and shift image to face rectangle positions
const auto faceSize = fastMax(faceRectangle.width, faceRectangle.height);
const double scaleFace = faceSize / (double)netInputSide;
cv::Mat Mscaling = cv::Mat::eye(2, 3, CV_64F);
Mscaling.at<double>(0,0) = scaleFace;
Mscaling.at<double>(1,1) = scaleFace;
Mscaling.at<double>(0,2) = faceRectangle.x;
Mscaling.at<double>(1,2) = faceRectangle.y;
cv::Mat faceImage;
cv::warpAffine(cvInputData, faceImage, Mscaling,
cv::Size{mNetOutputSize.x, mNetOutputSize.y},
CV_INTER_LINEAR | CV_WARP_INVERSE_MAP,
cv::BORDER_CONSTANT, cv::Scalar(0,0,0));
// cv::Mat -> float*
uCharCvMatToFloatPtr(mFaceImageCrop.getPtr(), faceImage, true);
// // Debugging
// if (person < 5)
// cv::imshow("faceImage" + std::to_string(person), faceImage);
// 1. Caffe deep network
auto* inputDataGpuPtr = upImpl->spNetCaffe->getInputDataGpuPtr();
cudaMemcpy(inputDataGpuPtr, mFaceImageCrop.getPtr(),
mNetOutputSize.area() * 3 * sizeof(float),
cudaMemcpyHostToDevice);
upImpl->spNetCaffe->forwardPass();
// 2. Resize heat maps + merge different scales
#ifdef USE_CUDA
upImpl->spResizeAndMergeCaffe->Forward_gpu({upImpl->spCaffeNetOutputBlob.get()},
{upImpl->spHeatMapsBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
upImpl->spResizeAndMergeCaffe->Forward_cpu({upImpl->spCaffeNetOutputBlob.get()},
{upImpl->spHeatMapsBlob.get()});
#endif
// 3. Get peaks by Non-Maximum Suppression
#ifdef USE_CUDA
upImpl->spMaximumCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()},
{upImpl->spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
upImpl->spMaximumCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()},
{upImpl->spPeaksBlob.get()});
#endif
const auto* facePeaksPtr = upImpl->spPeaksBlob->mutable_cpu_data();
for (auto part = 0 ; part < mFaceKeypoints.getSize(1) ; part++)
{
const auto xyIndex = part * mFaceKeypoints.getSize(2);
const auto x = facePeaksPtr[xyIndex];
const auto y = facePeaksPtr[xyIndex + 1];
const auto score = facePeaksPtr[xyIndex + 2];
const auto baseIndex = mFaceKeypoints.getSize(2)
* (part + person * mFaceKeypoints.getSize(1));
mFaceKeypoints[baseIndex] = (float)(scaleInputToOutput
* (Mscaling.at<double>(0,0) * x
+ Mscaling.at<double>(0,1) * y
+ Mscaling.at<double>(0,2)));
mFaceKeypoints[baseIndex+1] = (float)(scaleInputToOutput
* (Mscaling.at<double>(1,0) * x
+ Mscaling.at<double>(1,1) * y
+ Mscaling.at<double>(1,2)));
mFaceKeypoints[baseIndex+2] = score;
}
// HeatMaps: storing
if (!mHeatMapTypes.empty())
updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode,
upImpl->spHeatMapsBlob->gpu_data());
}
}
// // Debugging
// cv::imshow("AcvInputDataCopy", cvInputDataCopy);
}
else
mFaceKeypoints.reset();
#else
UNUSED(faceRectangles);
UNUSED(cvInputData);
UNUSED(scaleInputToOutput);
#endif
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
}
#ifndef CPU_ONLY
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
......@@ -19,7 +19,7 @@ namespace op
try
{
// Free CUDA pointers - Note that if pointers are 0 (i.e. nullptr), no operation is performed.
#ifndef CPU_ONLY
#ifdef USE_CUDA
cudaFree(pGpuFace);
#endif
}
......@@ -35,7 +35,7 @@ namespace op
{
log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// GPU memory allocation for rendering
#ifndef CPU_ONLY
#ifdef USE_CUDA
cudaMalloc((void**)(&pGpuFace), POSE_MAX_PEOPLE * FACE_NUMBER_PARTS * 3 * sizeof(float));
#endif
log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
......@@ -54,7 +54,7 @@ namespace op
if (outputData.empty())
error("Empty Array<float> outputData.", __LINE__, __FUNCTION__, __FILE__);
// GPU rendering
#ifndef CPU_ONLY
#ifdef USE_CUDA
const auto elementRendered = spElementToRender->load(); // I prefer std::round(T&) over intRound(T) for std::atomic
const auto numberPeople = faceKeypoints.getSize(0);
const Point<int> frameSize{outputData.getSize(2), outputData.getSize(1)};
......@@ -73,11 +73,11 @@ namespace op
// GPU memory to CPU if last renderer
gpuToCpuMemoryIfLastRenderer(outputData.getPtr(), outputData.getVolume());
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// CPU_ONLY mode
#else
error("GPU rendering not available if `CPU_ONLY` is set.", __LINE__, __FUNCTION__, __FILE__);
UNUSED(outputData);
UNUSED(faceKeypoints);
error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......
......@@ -6,13 +6,12 @@
namespace op
{
const dim3 THREADS_PER_BLOCK{128, 128, 1};
__constant__ const unsigned int PART_PAIRS_GPU[] = FACE_PAIRS_RENDER_GPU;
__constant__ const float COLORS[] = {FACE_COLORS_RENDER_GPU};
__global__ void renderFaceParts(float* targetPtr, const int targetWidth, const int targetHeight,
const float* const facePtr, const int numberPeople, const float threshold,
const float alphaColorToAdd)
const float* const facePtr, const int numberPeople,
const float threshold, const float alphaColorToAdd)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
......@@ -43,10 +42,11 @@ namespace op
{
if (numberPeople > 0)
{
const auto numBlocks = getNumberCudaBlocks(frameSize, THREADS_PER_BLOCK);
renderFaceParts<<<THREADS_PER_BLOCK, numBlocks>>>(framePtr, frameSize.x, frameSize.y, facePtr,
numberPeople, renderThreshold,
alphaColorToAdd);
dim3 threadsPerBlock;
dim3 numBlocks;
getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
renderFaceParts<<<threadsPerBlock, numBlocks>>>(framePtr, frameSize.x, frameSize.y, facePtr,
numberPeople, renderThreshold, alphaColorToAdd);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
}
}
......
......@@ -58,7 +58,7 @@ namespace op
{
}
void GuiInfoAdder::addInfo(cv::Mat& cvOutputData, const Array<float>& poseKeypoints, const unsigned long long id,
void GuiInfoAdder::addInfo(cv::Mat& cvOutputData, const int numberPeople, const unsigned long long id,
const std::string& elementRenderedName)
{
try
......@@ -101,8 +101,9 @@ namespace op
putTextOnCvMat(cvOutputData, "Frame: " + std::to_string(id),
{borderMargin, (int)(cvOutputData.rows - borderMargin)}, white, false);
// Number people
putTextOnCvMat(cvOutputData, "People: " + std::to_string(poseKeypoints.getSize(0)),
{(int)(cvOutputData.cols - borderMargin), (int)(cvOutputData.rows - borderMargin)}, white, true);
putTextOnCvMat(cvOutputData, "People: " + std::to_string(numberPeople),
{(int)(cvOutputData.cols - borderMargin), (int)(cvOutputData.rows - borderMargin)},
white, true);
}
catch (const std::exception& e)
{
......
......@@ -3,6 +3,7 @@ set(SOURCES
handDetector.cpp
handDetectorFromTxt.cpp
handExtractor.cpp
handExtractorCaffe.cpp
handCpuRenderer.cpp
handGpuRenderer.cpp
renderHand.cpp
......
......@@ -130,7 +130,7 @@ namespace op
{
}
std::vector<std::array<Rectangle<float>, 2>> HandDetector::detectHands(const Array<float>& poseKeypoints, const float scaleInputToOutput) const
std::vector<std::array<Rectangle<float>, 2>> HandDetector::detectHands(const Array<float>& poseKeypoints, const double scaleInputToOutput) const
{
try
{
......@@ -148,8 +148,8 @@ namespace op
mPoseIndexes[(int)PosePart::LShoulder], mPoseIndexes[(int)PosePart::RWrist],
mPoseIndexes[(int)PosePart::RElbow], mPoseIndexes[(int)PosePart::RShoulder], threshold
);
handRectangles.at(person).at(0) /= scaleInputToOutput;
handRectangles.at(person).at(1) /= scaleInputToOutput;
handRectangles.at(person).at(0) /= (float) scaleInputToOutput;
handRectangles.at(person).at(1) /= (float) scaleInputToOutput;
}
}
return handRectangles;
......@@ -161,7 +161,7 @@ namespace op
}
}
std::vector<std::array<Rectangle<float>, 2>> HandDetector::trackHands(const Array<float>& poseKeypoints, const float scaleInputToOutput)
std::vector<std::array<Rectangle<float>, 2>> HandDetector::trackHands(const Array<float>& poseKeypoints, const double scaleInputToOutput)
{
try
{
......
#include <limits> // std::numeric_limits
#include <opencv2/opencv.hpp> // CV_WARP_INVERSE_MAP, CV_INTER_LINEAR
#include <openpose/core/netCaffe.hpp>
#include <openpose/hand/handParameters.hpp>
#include <openpose/utilities/check.hpp>
#include <openpose/utilities/cuda.hpp>
#include <openpose/utilities/fastMath.hpp>
#include <openpose/utilities/keypoint.hpp>
#include <openpose/utilities/openCv.hpp>
#include <openpose/hand/handExtractor.hpp>
namespace op
{
void cropFrame(Array<float>& handImageCrop, cv::Mat& affineMatrix, const cv::Mat& cvInputData,
const Rectangle<float>& handRectangle, const int netInputSide,
const Point<int>& netOutputSize, const bool mirrorImage)
{
try
{
// Resize image to hands positions
const auto scaleLeftHand = handRectangle.width / (float)netInputSide;
affineMatrix = cv::Mat::eye(2,3,CV_64F);
if (mirrorImage)
affineMatrix.at<double>(0,0) = -scaleLeftHand;
else
affineMatrix.at<double>(0,0) = scaleLeftHand;
affineMatrix.at<double>(1,1) = scaleLeftHand;
if (mirrorImage)
affineMatrix.at<double>(0,2) = handRectangle.x + handRectangle.width;
else
affineMatrix.at<double>(0,2) = handRectangle.x;
affineMatrix.at<double>(1,2) = handRectangle.y;
cv::Mat handImage;
cv::warpAffine(cvInputData, handImage, affineMatrix, cv::Size{netOutputSize.x, netOutputSize.y},
CV_INTER_LINEAR | CV_WARP_INVERSE_MAP, cv::BORDER_CONSTANT, cv::Scalar{0,0,0});
// CV_INTER_CUBIC | CV_WARP_INVERSE_MAP, cv::BORDER_CONSTANT, cv::Scalar{0,0,0});
// cv::Mat -> float*
uCharCvMatToFloatPtr(handImageCrop.getPtr(), handImage, true);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void connectKeypoints(Array<float>& handCurrent, const float scaleInputToOutput, const int person,
const cv::Mat& affineMatrix, const float* handPeaks)
{
try
{
// Estimate keypoint locations
for (auto part = 0 ; part < handCurrent.getSize(1) ; part++)
{
const auto xyIndex = part * handCurrent.getSize(2);
const auto x = handPeaks[xyIndex];
const auto y = handPeaks[xyIndex + 1];
const auto score = handPeaks[xyIndex + 2];
const auto baseIndex = handCurrent.getSize(2) * (part + person * handCurrent.getSize(1));
handCurrent[baseIndex] = (float)(scaleInputToOutput
* (affineMatrix.at<double>(0,0)*x + affineMatrix.at<double>(0,1)*y
+ affineMatrix.at<double>(0,2)));
handCurrent[baseIndex+1] = (float)(scaleInputToOutput
* (affineMatrix.at<double>(1,0)*x + affineMatrix.at<double>(1,1)*y
+ affineMatrix.at<double>(1,2)));
handCurrent[baseIndex+2] = score;
}
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
Rectangle<float> getHandRectangle(Array<float>& handCurrent, const int person, const float increaseRatio,
const int handNumberParts, const float thresholdRectangle,
const Rectangle<float>& previousHandRectangle = Rectangle<float>{})
{
try
{
// Initial Rectangle
auto handRectangle = getKeypointsRectangle(handCurrent, person, handNumberParts, thresholdRectangle);
// Get final width
auto finalWidth = fastMax(handRectangle.width, handRectangle.height) * increaseRatio;
if (previousHandRectangle.width > 0 && previousHandRectangle.height > 0)
finalWidth = fastMax(handRectangle.width, 0.85f
* fastMax(previousHandRectangle.width, previousHandRectangle.height));
// Update Rectangle
handRectangle.recenter(finalWidth, finalWidth);
return handRectangle;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
return Rectangle<float>{};
}
}
void updateHandHeatMapsForPerson(Array<float>& heatMaps, const int person, const ScaleMode heatMapScaleMode,
const float* heatMapsGpuPtr)
{
try
{
// Copy memory
const auto channelOffset = heatMaps.getVolume(2, 3);
const auto volumeBodyParts = HAND_NUMBER_PARTS * channelOffset;
auto totalOffset = 0u;
auto* heatMapsPtr = &heatMaps.getPtr()[person*volumeBodyParts];
// Copy hand parts
cudaMemcpy(heatMapsPtr, heatMapsGpuPtr, volumeBodyParts * sizeof(float), cudaMemcpyDeviceToHost);
// Change from [0,1] to [-1,1]
if (heatMapScaleMode == ScaleMode::PlusMinusOne)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]) * 2.f - 1.f;
// [0, 255]
else if (heatMapScaleMode == ScaleMode::UnsignedChar)
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = (float)intRound(fastTruncate(heatMapsPtr[i]) * 255.f);
// Avoid values outside original range
else
for (auto i = 0u ; i < volumeBodyParts ; i++)
heatMapsPtr[i] = fastTruncate(heatMapsPtr[i]);
totalOffset += (unsigned int)volumeBodyParts;
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
HandExtractor::HandExtractor(const Point<int>& netInputSize, const Point<int>& netOutputSize,
const std::string& modelFolder, const int gpuId, const unsigned short numberScales,
const float rangeScales, const std::vector<HeatMapType>& heatMapTypes,
const ScaleMode heatMapScale) :
const unsigned short numberScales, const float rangeScales,
const std::vector<HeatMapType>& heatMapTypes, const ScaleMode heatMapScale) :
mMultiScaleNumberAndRange{std::make_pair(numberScales, rangeScales)},
mNetOutputSize{netOutputSize},
spNet{std::make_shared<NetCaffe>(std::array<int,4>{1, 3, mNetOutputSize.y, mNetOutputSize.x},
modelFolder + HAND_PROTOTXT, modelFolder + HAND_TRAINED_MODEL, gpuId)},
spResizeAndMergeCaffe{std::make_shared<ResizeAndMergeCaffe<float>>()},
spMaximumCaffe{std::make_shared<MaximumCaffe<float>>()},
mHandImageCrop{mNetOutputSize.area()*3},
mHeatMapScaleMode{heatMapScale},
mHeatMapTypes{heatMapTypes}
......@@ -165,161 +36,18 @@ namespace op
}
}
void HandExtractor::initializationOnThread()
HandExtractor::~HandExtractor()
{
try
{
// Logging
log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// Get thread id
mThreadId = {std::this_thread::get_id()};
// Caffe net
spNet->initializationOnThread();
spCaffeNetOutputBlob = ((NetCaffe*)spNet.get())->getOutputBlob();
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// HeatMaps extractor blob and layer
spHeatMapsBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
const bool mergeFirstDimension = true;
spResizeAndMergeCaffe->Reshape({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()},
HAND_CCN_DECREASE_FACTOR, mergeFirstDimension);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Pose extractor blob and layer
spPeaksBlob = {std::make_shared<caffe::Blob<float>>(1,1,1,1)};
spMaximumCaffe->Reshape({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// Logging
log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void HandExtractor::forwardPass(const std::vector<std::array<Rectangle<float>, 2>> handRectangles,
const cv::Mat& cvInputData,
const float scaleInputToOutput)
void HandExtractor::initializationOnThread()
{
try
{
if (!handRectangles.empty())
{
// Security checks
if (cvInputData.empty())
error("Empty cvInputData.", __LINE__, __FUNCTION__, __FILE__);
// Fix parameters
const auto netInputSide = fastMin(mNetOutputSize.x, mNetOutputSize.y);
// Set hand size
const auto numberPeople = (int)handRectangles.size();
mHandKeypoints[0].reset({numberPeople, (int)HAND_NUMBER_PARTS, 3}, 0);
mHandKeypoints[1].reset(mHandKeypoints[0].getSize(), 0);
// HeatMaps: define size
if (!mHeatMapTypes.empty())
{
mHeatMaps[0].reset({numberPeople, (int)HAND_NUMBER_PARTS, mNetOutputSize.y, mNetOutputSize.x});
mHeatMaps[1].reset({numberPeople, (int)HAND_NUMBER_PARTS, mNetOutputSize.y, mNetOutputSize.x});
}
// // Debugging
// cv::Mat cvInputDataCopied = cvInputData.clone();
// Extract hand keypoints for each person
for (auto hand = 0 ; hand < 2 ; hand++)
{
// Parameters
auto& handCurrent = mHandKeypoints[hand];
const bool mirrorImage = (hand == 0);
for (auto person = 0 ; person < numberPeople ; person++)
{
const auto& handRectangle = handRectangles.at(person).at(hand);
// Only consider faces with a minimum pixel area
const auto minHandSize = fastMin(handRectangle.width, handRectangle.height);
// // Debugging -> red rectangle
// if (handRectangle.width > 0)
// cv::rectangle(cvInputDataCopied,
// cv::Point{intRound(handRectangle.x), intRound(handRectangle.y)},
// cv::Point{intRound(handRectangle.x + handRectangle.width),
// intRound(handRectangle.y + handRectangle.height)},
// cv::Scalar{(hand * 255.f),0.f,255.f}, 2);
// Get parts
if (minHandSize > 1 && handRectangle.area() > 10)
{
// Single-scale detection
if (mMultiScaleNumberAndRange.first == 1)
{
// // Debugging -> green rectangle overwriting red one
// if (handRectangle.width > 0)
// cv::rectangle(cvInputDataCopied,
// cv::Point{intRound(handRectangle.x), intRound(handRectangle.y)},
// cv::Point{intRound(handRectangle.x + handRectangle.width),
// intRound(handRectangle.y + handRectangle.height)},
// cv::Scalar{(hand * 255.f),255.f,0.f}, 2);
// Parameters
cv::Mat affineMatrix;
// Resize image to hands positions + cv::Mat -> float*
cropFrame(mHandImageCrop, affineMatrix, cvInputData, handRectangle, netInputSide,
mNetOutputSize, mirrorImage);
// Deep net + Estimate keypoint locations
detectHandKeypoints(handCurrent, scaleInputToOutput, person, affineMatrix);
}
// Multi-scale detection
else
{
const auto handPtrArea = handCurrent.getSize(1) * handCurrent.getSize(2);
auto* handCurrentPtr = handCurrent.getPtr() + person * handPtrArea;
const auto numberScales = mMultiScaleNumberAndRange.first;
const auto initScale = 1.f - mMultiScaleNumberAndRange.second / 2.f;
for (auto i = 0 ; i < numberScales ; i++)
{
// Get current scale
const auto scale = initScale
+ mMultiScaleNumberAndRange.second * i / (numberScales-1.f);
// Process hand
Array<float> handEstimated({1, handCurrent.getSize(1), handCurrent.getSize(2)}, 0);
const auto handRectangleScale = recenter(
handRectangle,
(float)(intRound(handRectangle.width * scale) / 2 * 2),
(float)(intRound(handRectangle.height * scale) / 2 * 2)
);
// // Debugging -> blue rectangle
// cv::rectangle(cvInputDataCopied,
// cv::Point{intRound(handRectangleScale.x),
// intRound(handRectangleScale.y)},
// cv::Point{intRound(handRectangleScale.x
// + handRectangleScale.width),
// intRound(handRectangleScale.y
// + handRectangleScale.height)},
// cv::Scalar{255,0,0}, 2);
// Parameters
cv::Mat affineMatrix;
// Resize image to hands positions + cv::Mat -> float*
cropFrame(mHandImageCrop, affineMatrix, cvInputData, handRectangleScale,
netInputSide, mNetOutputSize, mirrorImage);
// Deep net + Estimate keypoint locations
detectHandKeypoints(handEstimated, scaleInputToOutput, 0, affineMatrix);
if (i == 0
|| getAverageScore(handEstimated, 0) > getAverageScore(handCurrent, person))
std::copy(handEstimated.getConstPtr(),
handEstimated.getConstPtr() + handPtrArea, handCurrentPtr);
}
}
// HeatMaps: storing
if (!mHeatMapTypes.empty())
updateHandHeatMapsForPerson(mHeatMaps[hand], person, mHeatMapScaleMode,
spHeatMapsBlob->gpu_data());
}
}
}
// // Debugging
// cv::imshow("cvInputDataCopied", cvInputDataCopied);
}
else
{
mHandKeypoints[0].reset();
mHandKeypoints[1].reset();
}
// Get thread id
mThreadId = {std::this_thread::get_id()};
// Deep net initialization
netInitializationOnThread();
}
catch (const std::exception& e)
{
......@@ -368,41 +96,4 @@ namespace op
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
void HandExtractor::detectHandKeypoints(Array<float>& handCurrent, const float scaleInputToOutput,
const int person, const cv::Mat& affineMatrix)
{
try
{
// Deep net
// 1. Caffe deep network
auto* inputDataGpuPtr = spNet->getInputDataGpuPtr();
cudaMemcpy(inputDataGpuPtr, mHandImageCrop.getConstPtr(), mNetOutputSize.area() * 3 * sizeof(float),
cudaMemcpyHostToDevice);
spNet->forwardPass();
// 2. Resize heat maps + merge different scales
#ifndef CPU_ONLY
spResizeAndMergeCaffe->Forward_gpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
spResizeAndMergeCaffe->Forward_cpu({spCaffeNetOutputBlob.get()}, {spHeatMapsBlob.get()});
#endif
// 3. Get peaks by Non-Maximum Suppression
#ifndef CPU_ONLY
spMaximumCaffe->Forward_gpu({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#else
spMaximumCaffe->Forward_cpu({spHeatMapsBlob.get()}, {spPeaksBlob.get()});
#endif
// Estimate keypoint locations
connectKeypoints(handCurrent, scaleInputToOutput, person, affineMatrix, spPeaksBlob->mutable_cpu_data());
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
}
此差异已折叠。
#ifndef CPU_ONLY
#ifdef USE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
......@@ -19,7 +19,7 @@ namespace op
try
{
// Free CUDA pointers - Note that if pointers are 0 (i.e. nullptr), no operation is performed.
#ifndef CPU_ONLY
#ifdef USE_CUDA
cudaFree(pGpuHand);
#endif
}
......@@ -35,7 +35,7 @@ namespace op
{
log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
// GPU memory allocation for rendering
#ifndef CPU_ONLY
#ifdef USE_CUDA
cudaMalloc((void**)(&pGpuHand), HAND_MAX_HANDS * HAND_NUMBER_PARTS * 3 * sizeof(float));
#endif
log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
......@@ -56,7 +56,7 @@ namespace op
if (handKeypoints[0].getSize(0) != handKeypoints[1].getSize(0))
error("Wrong hand format: handKeypoints.getSize(0) != handKeypoints.getSize(1).", __LINE__, __FUNCTION__, __FILE__);
// GPU rendering
#ifndef CPU_ONLY
#ifdef USE_CUDA
const auto elementRendered = spElementToRender->load(); // I prefer std::round(T&) over intRound(T) for std::atomic
const auto numberPeople = handKeypoints[0].getSize(0);
const Point<int> frameSize{outputData.getSize(2), outputData.getSize(1)};
......@@ -76,11 +76,11 @@ namespace op
// GPU memory to CPU if last renderer
gpuToCpuMemoryIfLastRenderer(outputData.getPtr(), outputData.getVolume());
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
// CPU_ONLY mode
#else
error("GPU rendering not available if `CPU_ONLY` is set.", __LINE__, __FUNCTION__, __FILE__);
UNUSED(outputData);
UNUSED(handKeypoints);
error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
" functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
}
catch (const std::exception& e)
......
......@@ -9,8 +9,6 @@ namespace op
__constant__ const unsigned int PART_PAIRS_GPU[] = HAND_PAIRS_RENDER_GPU;
__constant__ const float COLORS[] = {HAND_COLORS_RENDER_GPU};
__global__ void renderHandsParts(float* targetPtr, const int targetWidth, const int targetHeight,
const float* const handsPtr, const int numberHands,
const float threshold, const float alphaColorToAdd)
......@@ -46,7 +44,7 @@ namespace op
{
dim3 threadsPerBlock;
dim3 numBlocks;
std::tie(threadsPerBlock, numBlocks) = getNumberCudaThreadsAndBlocks(frameSize);
getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
renderHandsParts<<<threadsPerBlock, numBlocks>>>(framePtr, frameSize.x, frameSize.y, handsPtr,
numberHands, renderThreshold, alphaColorToAdd);
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册