From 3a3f2d8f8b7e2e480ff748ab091fd06fd1b0f8da Mon Sep 17 00:00:00 2001
From: gineshidalgo99 <gineshidalgo99@gmail.com>
Date: Fri, 22 Sep 2017 10:25:12 -0400
Subject: [PATCH] Cleaner doc & readme

---
 README.md                                     |  4 +-
 doc/demo_overview.md                          |  4 +-
 doc/release_notes.md                          |  2 +-
 examples/openpose/openpose.cpp                |  5 +-
 .../tutorial_pose/1_extract_from_image.cpp    |  5 +-
 .../2_extract_pose_or_heatmat_from_image.cpp  |  5 +-
 .../tutorial_wrapper/1_user_asynchronous.cpp  |  5 +-
 .../tutorial_wrapper/2_user_synchronous.cpp   |  5 +-
 examples_beta/openpose3d/openpose3d.cpp       |  5 +-
 src/openpose/core/nmsBase.cu                  | 66 +++++++++++--------
 10 files changed, 63 insertions(+), 43 deletions(-)

diff --git a/README.md b/README.md
index 3a7cd7bb..9b70d461 100644
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ OpenPose is a **library for real-time multi-person keypoint detection and multi-
 
 
 ## Latest News
-- Jul 2017: **Windows**, New [**portable demo**](doc/installation.md#installation---demo) **and** [**easier library installation**](doc/installation.md#installation---library)!
+- Jul 2017: **Windows**, new [**portable demo**](doc/installation.md#installation---demo) **and** [**easier library installation**](doc/installation.md#installation---library)!
 - Jul 2017: **Hands** released!
 - Jun 2017: **Face** released!
 - May 2017: **Windows** version released!
@@ -72,7 +72,7 @@ The pose estimation work is based on the C++ code from [the ECCV 2016 demo](http
 
 ## Functionality
 - Multi-person 15 or **18-keypoint body pose** estimation and rendering. **Running time invariant to number of people** on the image.
-- Multi-person **2x21-keypoint hand** estimation and rendering. Note: In this initial version, **running time** linearly **depends** on the **number of people** on the image. **Coming soon (in around 1-5 days)!**
+- Multi-person **2x21-keypoint hand** estimation and rendering. Note: In this initial version, **running time** linearly **depends** on the **number of people** on the image.
 - Multi-person **70-keypoint face** estimation and rendering. Note: In this initial version, **running time** linearly **depends** on the **number of people** on the image.
 - Flexible and easy-to-configure **multi-threading** module.
 - Image, video, and webcam reader.
diff --git a/doc/demo_overview.md b/doc/demo_overview.md
index 50f3d18e..613e3fda 100644
--- a/doc/demo_overview.md
+++ b/doc/demo_overview.md
@@ -103,7 +103,7 @@ We enumerate some of the most important flags, check the `Flags Detailed Descrip
 - `--write_images folder_path`: Save processed images on a folder.
 - `--write_keypoint path/`: Output JSON, XML or YML files with the people pose data on a folder.
 - `--process_real_time`: For video, it might skip frames to display at real time.
-- `--disable_blending`: If selected, it will render the results on a black backgroung, not showing the original image. Related: `part_to_show`, `alpha_pose`, and `alpha_pose`.
+- `--disable_blending`: If enabled, it will render the results (keypoint skeletons or heatmaps) on a black background, not showing the original image. Related: `part_to_show`, `alpha_pose`, and `alpha_pose`.
 - `--part_to_show`: Prediction channel to visualize.
 - `--no_display`: Display window not opened. Useful for servers and/or to slightly speed up OpenPose.
 - `--num_gpu 2 --num_gpu_start 1`: Parallelize over this number of GPUs starting by the desired device id. By default it uses all the available GPUs.
@@ -161,7 +161,7 @@ Each flag is divided into flag name, default value, and description.
 
 7. OpenPose Rendering
 - DEFINE_int32(part_to_show,              0,              "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body part heat map, 19 for the background heat map, 20 for all the body part heat maps together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-- DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it will only display the results on a black background.");
+- DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black background, instead of being rendered into the original image. Related: `part_to_show`, `alpha_pose`, and `alpha_pose`.");
 
 8. OpenPose Rendering Pose
 - DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e. wrong detections).");
diff --git a/doc/release_notes.md b/doc/release_notes.md
index 459cc7d5..7018ad01 100644
--- a/doc/release_notes.md
+++ b/doc/release_notes.md
@@ -107,7 +107,7 @@ OpenPose Library - Release Notes
     1. Added CMake installer for Ubuntu.
     2. Added how to use keypoint data in `examples/tutorial_wrapper/`.
     3. Added flag for warnings of type `-Wsign-compare` and removed in code.
-    4. Slightly improved accuracy by considering ears-shoulder connection (e.g. 0.4 mAP for 1 scale in validation set).
+    4. Slightly improved accuracy by considering ears-shoulder connection (e.g. +0.4 mAP for 1 scale in validation set).
 2. Main bugs fixed:
     1. Windows version crashing with std::map copy.
 
diff --git a/examples/openpose/openpose.cpp b/examples/openpose/openpose.cpp
index e3c7c5d8..5b967201 100755
--- a/examples/openpose/openpose.cpp
+++ b/examples/openpose/openpose.cpp
@@ -119,8 +119,9 @@ DEFINE_bool(hand_tracking,              false,          "Adding hand tracking mi
 DEFINE_int32(part_to_show,              0,              "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body"
                                                         " part heat map, 19 for the background heat map, 20 for all the body part heat maps"
                                                         " together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 // OpenPose Rendering Pose
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
diff --git a/examples/tutorial_pose/1_extract_from_image.cpp b/examples/tutorial_pose/1_extract_from_image.cpp
index 48cbcbb9..e4c0ffa7 100644
--- a/examples/tutorial_pose/1_extract_from_image.cpp
+++ b/examples/tutorial_pose/1_extract_from_image.cpp
@@ -43,8 +43,9 @@ DEFINE_double(scale_gap,                0.3,            "Scale gap between scale
                                                         " `net_resolution` by your desired initial scale.");
 DEFINE_int32(scale_number,              1,              "Number of scales to average.");
 // OpenPose Rendering
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
                                                         " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
diff --git a/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp b/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp
index c04beb9b..f834d68e 100644
--- a/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp
+++ b/examples/tutorial_pose/2_extract_pose_or_heatmat_from_image.cpp
@@ -46,8 +46,9 @@ DEFINE_int32(scale_number,              1,              "Number of scales to ave
 DEFINE_int32(part_to_show,              19,             "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body"
                                                         " part heat map, 19 for the background heat map, 20 for all the body part heat maps"
                                                         " together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
                                                         " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
diff --git a/examples/tutorial_wrapper/1_user_asynchronous.cpp b/examples/tutorial_wrapper/1_user_asynchronous.cpp
index 4552d9e2..db0d7b98 100644
--- a/examples/tutorial_wrapper/1_user_asynchronous.cpp
+++ b/examples/tutorial_wrapper/1_user_asynchronous.cpp
@@ -108,8 +108,9 @@ DEFINE_bool(hand_tracking,              false,          "Adding hand tracking mi
 DEFINE_int32(part_to_show,              0,              "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body"
                                                         " part heat map, 19 for the background heat map, 20 for all the body part heat maps"
                                                         " together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 // OpenPose Rendering Pose
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
diff --git a/examples/tutorial_wrapper/2_user_synchronous.cpp b/examples/tutorial_wrapper/2_user_synchronous.cpp
index 939588dc..0c893a1e 100644
--- a/examples/tutorial_wrapper/2_user_synchronous.cpp
+++ b/examples/tutorial_wrapper/2_user_synchronous.cpp
@@ -108,8 +108,9 @@ DEFINE_bool(hand_tracking,              false,          "Adding hand tracking mi
 DEFINE_int32(part_to_show,              0,              "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body"
                                                         " part heat map, 19 for the background heat map, 20 for all the body part heat maps"
                                                         " together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 // OpenPose Rendering Pose
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
diff --git a/examples_beta/openpose3d/openpose3d.cpp b/examples_beta/openpose3d/openpose3d.cpp
index 6b9d54b9..39e5fa4d 100644
--- a/examples_beta/openpose3d/openpose3d.cpp
+++ b/examples_beta/openpose3d/openpose3d.cpp
@@ -95,8 +95,9 @@ DEFINE_bool(hand_tracking,              false,          "Adding hand tracking mi
 DEFINE_int32(part_to_show,              0,              "Prediction channel to visualize (default: 0). 0 for all the body parts, 1-18 for each body"
                                                         " part heat map, 19 for the background heat map, 20 for all the body part heat maps"
                                                         " together, 21 for all the PAFs, 22-40 for each body part pair PAF");
-DEFINE_bool(disable_blending,           false,          "If blending is enabled, it will merge the results with the original frame. If disabled, it"
-                                                        " will only display the results on a black background.");
+DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black"
+                                                        " background, instead of being rendered into the original image. Related: `part_to_show`,"
+                                                        " `alpha_pose`, and `alpha_pose`.");
 // OpenPose Rendering Pose
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                         " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
diff --git a/src/openpose/core/nmsBase.cu b/src/openpose/core/nmsBase.cu
index ae6a4e20..c864d1eb 100644
--- a/src/openpose/core/nmsBase.cu
+++ b/src/openpose/core/nmsBase.cu
@@ -1,7 +1,7 @@
 #include <thrust/device_ptr.h>
 #include <thrust/scan.h>
 #include <openpose/utilities/cuda.hpp>
-#include <openpose/utilities/cuda.hpp>
+#include <openpose/utilities/cuda.hu>
 #include <openpose/core/nmsBase.hpp>
 
 namespace op
@@ -10,7 +10,8 @@ namespace op
     const auto THREADS_PER_BLOCK = 512u;
 
     template <typename T>
-    __global__ void nmsRegisterKernel(int* kernelPtr, const T* const sourcePtr, const int w, const int h, const T threshold)
+    __global__ void nmsRegisterKernel(int* kernelPtr, const T* const sourcePtr, const int w, const int h,
+                                      const T threshold)
     {
         // get pixel location (x,y)
         const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
@@ -22,16 +23,18 @@ namespace op
             const auto value = sourcePtr[index];
             if (value > threshold)
             {
-                const auto top_left        = sourcePtr[(y-1)*w + x-1];
-                const auto top             = sourcePtr[(y-1)*w + x];
-                const auto top_right       = sourcePtr[(y-1)*w + x+1];
-                const auto left            = sourcePtr[y*w + (x-1)];
-                const auto right           = sourcePtr[y*w + (x+1)];
-                const auto bottom_left     = sourcePtr[(y+1)*w + x-1];
-                const auto bottom          = sourcePtr[(y+1)*w + x];
-                const auto bottom_right    = sourcePtr[(y+1)*w + x+1];
-
-                if (value > top_left && value > top && value > top_right && value > left && value > right && value > bottom_left && value > bottom && value > bottom_right)
+                const auto topLeft     = sourcePtr[(y-1)*w + x-1];
+                const auto top         = sourcePtr[(y-1)*w + x];
+                const auto topRight    = sourcePtr[(y-1)*w + x+1];
+                const auto left        = sourcePtr[    y*w + x-1];
+                const auto right       = sourcePtr[    y*w + x+1];
+                const auto bottomLeft  = sourcePtr[(y+1)*w + x-1];
+                const auto bottom      = sourcePtr[(y+1)*w + x];
+                const auto bottomRight = sourcePtr[(y+1)*w + x+1];
+
+                if (value > topLeft && value > top && value > topRight
+                    && value > left && value > right
+                    && value > bottomLeft && value > bottom && value > bottomRight)
                     kernelPtr[index] = 1;
                 else
                     kernelPtr[index] = 0;
@@ -44,7 +47,8 @@ namespace op
     }
 
     template <typename T>
-    __global__ void writeResultKernel(T* output, const int length, const int* const kernelPtr, const T* const sourcePtr, const int width, const int height, const int maxPeaks)
+    __global__ void writeResultKernel(T* output, const int length, const int* const kernelPtr,
+                                      const T* const sourcePtr, const int width, const int height, const int maxPeaks)
     {
         __shared__ int local[THREADS_PER_BLOCK+1]; // one more
         const auto globalIdx = blockIdx.x * blockDim.x + threadIdx.x;
@@ -57,30 +61,33 @@ namespace op
                 local[threadIdx.x+1] = kernelPtr[globalIdx+1];
 
             __syncthreads();
-            // see difference, except the globally last one
+            // See difference, except the globally last one
             if (globalIdx != length - 1)
             {
+                // A[globalIdx] == A[globalIdx + 1] means no peak
                 if (local[threadIdx.x] != local[threadIdx.x + 1])
                 {
-                    //means A[globalIdx] == A[globalIdx + 1] as the kernelPtr[globalIdx]-th repeat
                     const auto peakIndex = kernelPtr[globalIdx]; //0-index
                     const auto peakLocX = (int)(globalIdx % width);
                     const auto peakLocY = (int)(globalIdx / width);
 
+                    // Accurate peak location: considered neighboors
                     if (peakIndex < maxPeaks) // limitation
                     {
                         T xAcc = 0.f;
                         T yAcc = 0.f;
                         T scoreAcc = 0.f;
-                        for (auto dy = -3 ; dy < 4 ; dy++)
+                        const auto dWidth = 3;
+                        const auto dHeight = 3;
+                        for (auto dy = -dHeight ; dy <= dHeight ; dy++)
                         {
                             const auto y = peakLocY + dy;
-                            if (0 <= y && y < height) // height = 368
+                            if (0 <= y && y < height) // Default height = 368
                             {
-                                for (auto dx = -3 ; dx < 4 ; dx++)
+                                for (auto dx = -dWidth ; dx <= dWidth ; dx++)
                                 {
                                     const auto x = peakLocX + dx;
-                                    if (0 <= x && x < width) // width = 656
+                                    if (0 <= x && x < width) // Default width = 656
                                     {
                                         const auto score = sourcePtr[y * width + x];
                                         if (score > 0)
@@ -107,7 +114,8 @@ namespace op
     }
 
     template <typename T>
-    void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize)
+    void nmsGpu(T* targetPtr, int* kernelPtr, const T* const sourcePtr, const T threshold,
+                const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize)
     {
         try
         {
@@ -121,7 +129,8 @@ namespace op
             const auto offsetTarget = (maxPeaks+1)*targetSize[3];
 
             const dim3 threadsPerBlock2D{THREADS_PER_BLOCK_1D, THREADS_PER_BLOCK_1D};
-            const dim3 numBlocks2D{getNumberCudaBlocks(width, threadsPerBlock2D.x), getNumberCudaBlocks(height, threadsPerBlock2D.y)};
+            const dim3 numBlocks2D{getNumberCudaBlocks(width, threadsPerBlock2D.x),
+                                   getNumberCudaBlocks(height, threadsPerBlock2D.y)};
             const dim3 threadsPerBlock1D{THREADS_PER_BLOCK};
             const dim3 numBlocks1D{getNumberCudaBlocks(imageOffset, threadsPerBlock1D.x)};
             // log("num_b: " + std::to_string(bottom->shape(0)));       // = 1
@@ -146,8 +155,9 @@ namespace op
 
                     // This returns kernelPtrOffsetted, a binary array with 0s & 1s. 1s in the local maximum positions (size = size(sourcePtrOffsetted))
                     nmsRegisterKernel<<<numBlocks2D, threadsPerBlock2D>>>(kernelPtrOffsetted, sourcePtrOffsetted, width, height, threshold); //[0,0,0,0,1,0,0,0,0,1,0,0,0,0]
-                    //debug
-                    // if (c==3){
+                    // // Debug
+                    // if (c==3)
+                    // {
                     //     char filename[50];
                     //     sprintf(filename, "work%02d.txt", c);
                     //     std::ofstream fout(filename);
@@ -165,7 +175,9 @@ namespace op
                     thrust::exclusive_scan(kernelThrustPtr, kernelThrustPtr + imageOffset, kernelThrustPtr); //[0,0,0,0,0,1,1,1,1,1,2,2,2,2]
 
                     // This returns targetPtrOffsetted, with the NMS applied over it
-                    writeResultKernel<<<numBlocks1D, threadsPerBlock1D>>>(targetPtrOffsetted, imageOffset, kernelPtrOffsetted, sourcePtrOffsetted, width, height, maxPeaks);
+                    writeResultKernel<<<numBlocks1D, threadsPerBlock1D>>>(targetPtrOffsetted, imageOffset,
+                                                                          kernelPtrOffsetted, sourcePtrOffsetted,
+                                                                          width, height, maxPeaks);
                 }
             }
             cudaCheck(__LINE__, __FUNCTION__, __FILE__);
@@ -176,6 +188,8 @@ namespace op
         }
     }
 
-    template void nmsGpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
-    template void nmsGpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold, const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
+    template void nmsGpu(float* targetPtr, int* kernelPtr, const float* const sourcePtr, const float threshold,
+                         const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
+    template void nmsGpu(double* targetPtr, int* kernelPtr, const double* const sourcePtr, const double threshold,
+                         const std::array<int, 4>& targetSize, const std::array<int, 4>& sourceSize);
 }
-- 
GitLab