renderPose.cu 46.8 KB
Newer Older
1
#include <openpose/pose/renderPose.hpp>
G
gineshidalgo99 已提交
2
#include <openpose/gpu/cuda.hpp>
G
Gines Hidalgo 已提交
3
#include <openpose/pose/poseParameters.hpp>
4 5
#include <openpose_private/gpu/cuda.hu>
#include <openpose_private/utilities/render.hu>
G
gineshidalgo99 已提交
6 7 8

namespace op
{
G
gineshidalgo99 已提交
9 10
    // PI digits: http://www.piday.org/million/
    __constant__ const float PI = 3.14159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745f;
11
    // Keypoint pairs
12
    __constant__ const unsigned int BODY_25_PAIRS_GPU[] = {POSE_BODY_25_PAIRS_RENDER_GPU};
13 14
    __constant__ const unsigned int COCO_PAIRS_GPU[] = {POSE_COCO_PAIRS_RENDER_GPU};
    __constant__ const unsigned int BODY_19_PAIRS_GPU[] = {POSE_BODY_19_PAIRS_RENDER_GPU};
G
gineshidalgo99 已提交
15
    __constant__ const unsigned int BODY_23_PAIRS_GPU[] = {POSE_BODY_23_PAIRS_RENDER_GPU};
16
    __constant__ const unsigned int BODY_25B_PAIRS_GPU[] = {POSE_BODY_25B_PAIRS_RENDER_GPU};
G
gineshidalgo99 已提交
17
    __constant__ const unsigned int BODY_135_PAIRS_GPU[] = {POSE_BODY_135_PAIRS_RENDER_GPU};
18
    __constant__ const unsigned int MPI_PAIRS_GPU[] = {POSE_MPI_PAIRS_RENDER_GPU};
19
    __constant__ const unsigned int CAR_12_PAIRS_GPU[] = {POSE_CAR_12_PAIRS_RENDER_GPU};
20
    __constant__ const unsigned int CAR_22_PAIRS_GPU[] = {POSE_CAR_22_PAIRS_RENDER_GPU};
21
    // Keypoint scales
22
    __constant__ const float BODY_25_SCALES[] = {POSE_BODY_25_SCALES_RENDER_GPU};
23 24
    __constant__ const float COCO_SCALES[] = {POSE_COCO_SCALES_RENDER_GPU};
    __constant__ const float BODY_19_SCALES[] = {POSE_BODY_19_SCALES_RENDER_GPU};
G
gineshidalgo99 已提交
25
    __constant__ const float BODY_23_SCALES[] = {POSE_BODY_23_SCALES_RENDER_GPU};
26
    __constant__ const float BODY_25B_SCALES[] = {POSE_BODY_25B_SCALES_RENDER_GPU};
G
gineshidalgo99 已提交
27
    __constant__ const float BODY_135_SCALES[] = {POSE_BODY_135_SCALES_RENDER_GPU};
28
    __constant__ const float MPI_SCALES[] = {POSE_MPI_SCALES_RENDER_GPU};
29
    __constant__ const float CAR_12_SCALES[] = {POSE_CAR_12_SCALES_RENDER_GPU};
30
    __constant__ const float CAR_22_SCALES[] = {POSE_CAR_22_SCALES_RENDER_GPU};
31
    // RGB colors
32
    __constant__ const float BODY_25_COLORS[] = {POSE_BODY_25_COLORS_RENDER_GPU};
33
    __constant__ const float COCO_COLORS[] = {POSE_COCO_COLORS_RENDER_GPU};
34
    __constant__ const float BODY_19_COLORS[] = {POSE_BODY_19_COLORS_RENDER_GPU};
G
gineshidalgo99 已提交
35
    __constant__ const float BODY_23_COLORS[] = {POSE_BODY_23_COLORS_RENDER_GPU};
36
    __constant__ const float BODY_25B_COLORS[] = {POSE_BODY_25B_COLORS_RENDER_GPU};
G
gineshidalgo99 已提交
37
    __constant__ const float BODY_135_COLORS[] = {POSE_BODY_135_COLORS_RENDER_GPU};
38
    __constant__ const float MPI_COLORS[] = {POSE_MPI_COLORS_RENDER_GPU};
39
    __constant__ const float CAR_12_COLORS[] = {POSE_CAR_12_COLORS_RENDER_GPU};
40
    __constant__ const float CAR_22_COLORS[] = {POSE_CAR_22_COLORS_RENDER_GPU};
G
gineshidalgo99 已提交
41 42 43



G
Gines Hidalgo 已提交
44
    inline __device__ void getColorHeatMap(float* colorPtr, const float v, const float vmin, const float vmax)
G
gineshidalgo99 已提交
45
    {
G
Gines Hidalgo 已提交
46
        const auto vTrunc = fastTruncateCuda(v, vmin, vmax);
G
gineshidalgo99 已提交
47 48
        const auto dv = vmax - vmin;

G
Gines Hidalgo 已提交
49
        if (vTrunc < (vmin + 0.125f * dv))
G
gineshidalgo99 已提交
50
        {
G
Gines Hidalgo 已提交
51
            colorPtr[0] = 256.f * (0.5f + (vTrunc * 4.f)); //B: 0.5 ~ 1
G
gineshidalgo99 已提交
52 53 54
            colorPtr[1] = 0.f;
            colorPtr[2] = 0.f;
        }
G
Gines Hidalgo 已提交
55
        else if (vTrunc < (vmin + 0.375f * dv))
G
gineshidalgo99 已提交
56 57
        {
            colorPtr[0] = 255.f;
G
Gines Hidalgo 已提交
58
            colorPtr[1] = 256.f * (vTrunc - 0.125f) * 4.f; //G: 0 ~ 1
G
gineshidalgo99 已提交
59 60
            colorPtr[2] = 0.f;
        }
G
Gines Hidalgo 已提交
61
        else if (vTrunc < (vmin + 0.625f * dv))
G
gineshidalgo99 已提交
62
        {
G
Gines Hidalgo 已提交
63
            colorPtr[0] = 256.f * (-4.f * vTrunc + 2.5f); //B: 1 ~ 0
G
gineshidalgo99 已提交
64
            colorPtr[1] = 255.f;
G
Gines Hidalgo 已提交
65
            colorPtr[2] = 256.f * (4.f * (vTrunc - 0.375f)); // R: 0 ~ 1
G
gineshidalgo99 已提交
66
        }
G
Gines Hidalgo 已提交
67
        else if (vTrunc < (vmin + 0.875f * dv))
G
gineshidalgo99 已提交
68 69
        {
            colorPtr[0] = 0.f;
G
Gines Hidalgo 已提交
70
            colorPtr[1] = 256.f * (-4.f * vTrunc + 3.5f); //G: 1 ~ 0
G
gineshidalgo99 已提交
71 72 73 74 75 76
            colorPtr[2] = 255.f;
        }
        else
        {
            colorPtr[0] = 0.f;
            colorPtr[1] = 0.f;
G
Gines Hidalgo 已提交
77
            colorPtr[2] = 256.f * (-4.f * vTrunc + 4.5f); //R: 1 ~ 0.5
G
gineshidalgo99 已提交
78 79 80
        }
    }

G
Gines Hidalgo 已提交
81
    inline __device__ void getColorAffinity(float3& colorPtr, const float v, const float vmin, const float vmax)
G
gineshidalgo99 已提交
82 83 84 85 86 87 88 89
    {
        const auto RY = 15;
        const auto YG =  6;
        const auto GC =  4;
        const auto CB = 11;
        const auto BM = 13;
        const auto MR =  6;
        const auto summed = RY+YG+GC+CB+BM+MR;       // 55
G
Gines Hidalgo 已提交
90
        const auto vTrunc = fastTruncateCuda(v, vmin, vmax) * summed;
G
gineshidalgo99 已提交
91

G
Gines Hidalgo 已提交
92 93 94 95 96 97 98 99 100 101 102 103
        if (vTrunc < RY)
            colorPtr = {255.f,                              255.f*(vTrunc/(RY)),                0.f};
        else if (vTrunc < RY+YG)
            colorPtr = {255.f*(1-((vTrunc-RY)/(YG))),       255.f,                              0.f};
        else if (vTrunc < RY+YG+GC)
            colorPtr = {0.f * (1-((vTrunc-RY)/(YG))),       255.f,                              255.f*((vTrunc-RY-YG)/(GC))};
        else if (vTrunc < RY+YG+GC+CB)
            colorPtr = {0.f,                                255.f*(1-((vTrunc-RY-YG-GC)/(CB))), 255.f};
        else if (vTrunc < summed-MR)
            colorPtr = {255.f*((vTrunc-RY-YG-GC-CB)/(BM)),  0.f,                                255.f};
        else if (vTrunc < summed)
            colorPtr = {255.f,                              0.f,                                255.f*(1-((vTrunc-RY-YG-GC-CB-BM)/(MR)))};
G
gineshidalgo99 已提交
104
        else
G
Gines Hidalgo 已提交
105
            colorPtr = {255.f,                              0.f,                                0.f};
G
gineshidalgo99 已提交
106 107 108 109
    }

    inline __device__ void getColorXYAffinity(float3& colorPtr, const float x, const float y)
    {
110
        const auto rad = fastMinCuda(1.f, sqrt( x*x + y*y ) );
111
        const float a = atan2(-y,-x)/PI;
G
gineshidalgo99 已提交
112 113 114 115 116 117 118 119 120
        auto fk = (a+1.f)/2.f; // 0 to 1
        if (::isnan(fk))
            fk = 0.f;
        getColorAffinity(colorPtr, fk, 0.f, 1.f);
        colorPtr.x *= rad;
        colorPtr.y *= rad;
        colorPtr.z *= rad;
    }

121 122 123 124 125 126 127 128
    __global__ void getBoundingBoxPerPersonPose(
        float* maxPtr, float* minPtr, float* scalePtr,const int targetWidth, const int targetHeight,
        const float* const keypointsPtr, const int numberPeople, const int numberParts, const float threshold)
    {
        getBoundingBoxPerPerson(
            maxPtr, minPtr, scalePtr, targetWidth, targetHeight, keypointsPtr, numberPeople, numberParts, threshold);
    }

G
gineshidalgo99 已提交
129
    __global__ void renderPoseCoco(
130 131 132
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
G
gineshidalgo99 已提交
133 134 135 136 137
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

138
        // Shared parameters
139 140
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
G
gineshidalgo99 已提交
141 142
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

143 144
        // Other parameters
        const auto numberPartPairs = sizeof(COCO_PAIRS_GPU) / (2*sizeof(COCO_PAIRS_GPU[0]));
145
        const auto numberScales = sizeof(COCO_SCALES) / sizeof(COCO_SCALES[0]);
G
gineshidalgo99 已提交
146
        const auto numberColors = sizeof(COCO_COLORS) / (3*sizeof(COCO_COLORS[0]));
147 148
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
149 150

        // Render key points
151 152 153 154 155
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
            globalIdx, x, y, targetWidth, targetHeight, posePtr, COCO_PAIRS_GPU, numberPeople, 18, numberPartPairs,
            COCO_COLORS, numberColors, radius, lineWidth, COCO_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 14 : -1), (googlyEyes ? 15 : -1));
G
gineshidalgo99 已提交
156 157
    }

G
gineshidalgo99 已提交
158
    __global__ void renderPoseBody19(
159 160 161
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
162 163 164 165 166 167
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
168 169
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
170 171 172 173
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(BODY_19_PAIRS_GPU) / (2*sizeof(BODY_19_PAIRS_GPU[0]));
174
        const auto numberScales = sizeof(BODY_19_SCALES) / sizeof(BODY_19_SCALES[0]);
175
        const auto numberColors = sizeof(BODY_19_COLORS) / (3*sizeof(BODY_19_COLORS[0]));
176 177
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
178 179

        // Render key points
180 181 182 183 184 185
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
            globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_19_PAIRS_GPU, numberPeople, 19, numberPartPairs,
            BODY_19_COLORS, numberColors, radius, lineWidth, BODY_19_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 15 : -1),
            (googlyEyes ? 16 : -1));
186 187
    }

G
gineshidalgo99 已提交
188
    __global__ void renderPoseBody23(
189 190 191
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
G
gineshidalgo99 已提交
192 193 194 195 196 197
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
198 199
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
G
gineshidalgo99 已提交
200 201 202
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
G
gineshidalgo99 已提交
203 204 205
        const auto numberPartPairs = sizeof(BODY_23_PAIRS_GPU) / (2*sizeof(BODY_23_PAIRS_GPU[0]));
        const auto numberScales = sizeof(BODY_23_SCALES) / sizeof(BODY_23_SCALES[0]);
        const auto numberColors = sizeof(BODY_23_COLORS) / (3*sizeof(BODY_23_COLORS[0]));
206 207
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
G
gineshidalgo99 已提交
208 209

        // Render key points
210 211 212 213 214
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
            globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_23_PAIRS_GPU, numberPeople, 23, numberPartPairs,
            BODY_23_COLORS, numberColors, radius, lineWidth, BODY_23_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 13 : -1), (googlyEyes ? 14 : -1));
G
gineshidalgo99 已提交
215 216
    }

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
    // __global__ void renderPoseBody25Old(
    //     float* targetPtr, const int targetWidth, const int targetHeight, const float* const posePtr,
    //     const int numberPeople, const float threshold, const bool googlyEyes, const bool blendOriginalFrame,
    //     const float alphaColorToAdd)
    // {
    //     const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
    //     const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
    //     const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

    //     // Shared parameters
    //     __shared__ float2 sharedMins[POSE_MAX_PEOPLE];
    //     __shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
    //     __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

    //     // Other parameters
    //     const auto numberPartPairs = sizeof(BODY_25_PAIRS_GPU) / (2*sizeof(BODY_25_PAIRS_GPU[0]));
    //     const auto numberScales = sizeof(BODY_25_SCALES) / sizeof(BODY_25_SCALES[0]);
    //     const auto numberColors = sizeof(BODY_25_COLORS) / (3*sizeof(BODY_25_COLORS[0]));
    //     const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
    //     const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;

    //     // Render key points
    //     renderKeypointsOld(
    //         targetPtr, sharedMaxs, sharedMins, sharedScaleF, globalIdx, x, y, targetWidth, targetHeight, posePtr,
    //         BODY_25_PAIRS_GPU, numberPeople, 25, numberPartPairs, BODY_25_COLORS, numberColors, radius, lineWidth,
    //         BODY_25_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame, (googlyEyes ? 15 : -1),
    //         (googlyEyes ? 16 : -1));
    // }

G
gineshidalgo99 已提交
246
    __global__ void renderPoseBody25(
247 248 249
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth,
        const int targetHeight, const float* const posePtr, const int numberPeople, const float threshold,
        const bool googlyEyes, const bool blendOriginalFrame, const float alphaColorToAdd)
G
gineshidalgo99 已提交
250 251 252 253 254 255
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
256 257
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
G
gineshidalgo99 已提交
258 259 260 261 262 263
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(BODY_25_PAIRS_GPU) / (2*sizeof(BODY_25_PAIRS_GPU[0]));
        const auto numberScales = sizeof(BODY_25_SCALES) / sizeof(BODY_25_SCALES[0]);
        const auto numberColors = sizeof(BODY_25_COLORS) / (3*sizeof(BODY_25_COLORS[0]));
264 265
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
G
gineshidalgo99 已提交
266 267

        // Render key points
268 269 270 271 272 273
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
            globalIdx, x, y, targetWidth, targetHeight,
            posePtr, BODY_25_PAIRS_GPU, numberPeople, 25, numberPartPairs, BODY_25_COLORS, numberColors,
            radius, lineWidth, BODY_25_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 15 : -1), (googlyEyes ? 16 : -1));
G
gineshidalgo99 已提交
274 275
    }

G
gineshidalgo99 已提交
276
    __global__ void renderPoseBody25b(
277 278 279
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
280 281 282 283 284 285
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
286 287
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
288 289 290
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
291 292 293
        const auto numberPartPairs = sizeof(BODY_25B_PAIRS_GPU) / (2*sizeof(BODY_25B_PAIRS_GPU[0]));
        const auto numberScales = sizeof(BODY_25B_SCALES) / sizeof(BODY_25B_SCALES[0]);
        const auto numberColors = sizeof(BODY_25B_COLORS) / (3*sizeof(BODY_25B_COLORS[0]));
294 295
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
296 297

        // Render key points
298 299 300 301 302
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr,
            globalIdx, x, y, targetWidth, targetHeight, posePtr, BODY_25B_PAIRS_GPU, numberPeople, 25, numberPartPairs,
            BODY_25B_COLORS, numberColors, radius, lineWidth, BODY_25B_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 1 : -1), (googlyEyes ? 2 : -1));
303 304
    }

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    // __global__ void renderPoseBody135Old(
    //     float* targetPtr, const int targetWidth, const int targetHeight, const float* const posePtr,
    //     const int numberPeople, const float threshold, const bool googlyEyes, const bool blendOriginalFrame,
    //     const float alphaColorToAdd)
    // {
    //     const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
    //     const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
    //     const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

    //     // Shared parameters
    //     __shared__ float2 sharedMins[POSE_MAX_PEOPLE];
    //     __shared__ float2 sharedMaxs[POSE_MAX_PEOPLE];
    //     __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

    //     // Other parameters
    //     const auto numberPartPairs = sizeof(BODY_135_PAIRS_GPU) / (2*sizeof(BODY_135_PAIRS_GPU[0]));
    //     const auto numberScales = sizeof(BODY_135_SCALES) / sizeof(BODY_135_SCALES[0]);
    //     const auto numberColors = sizeof(BODY_135_COLORS) / (3*sizeof(BODY_135_COLORS[0]));
    //     const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
    //     const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;

    //     // Render key points
    //     renderKeypointsOld(
    //         targetPtr, sharedMaxs, sharedMins, sharedScaleF, globalIdx, x, y, targetWidth, targetHeight, posePtr,
    //         BODY_135_PAIRS_GPU, numberPeople, 135, numberPartPairs, BODY_135_COLORS, numberColors, radius, lineWidth,
    //         BODY_135_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame, (googlyEyes ? 1 : -1),
    //         (googlyEyes ? 2 : -1));
    // }
333

G
gineshidalgo99 已提交
334
    __global__ void renderPoseBody135(
335 336 337
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
G
gineshidalgo99 已提交
338 339 340 341 342 343
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
344 345
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
G
gineshidalgo99 已提交
346 347 348 349 350 351
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(BODY_135_PAIRS_GPU) / (2*sizeof(BODY_135_PAIRS_GPU[0]));
        const auto numberScales = sizeof(BODY_135_SCALES) / sizeof(BODY_135_SCALES[0]);
        const auto numberColors = sizeof(BODY_135_COLORS) / (3*sizeof(BODY_135_COLORS[0]));
352 353
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
G
gineshidalgo99 已提交
354 355

        // Render key points
356 357 358 359 360
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
            targetHeight, posePtr, BODY_135_PAIRS_GPU, numberPeople, 135, numberPartPairs, BODY_135_COLORS,
            numberColors, radius, lineWidth, BODY_135_SCALES, numberScales, threshold, alphaColorToAdd,
            blendOriginalFrame, (googlyEyes ? 1 : -1), (googlyEyes ? 2 : -1));
G
gineshidalgo99 已提交
361 362 363
    }

    __global__ void renderPoseMpi29Parts(
364 365 366
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool blendOriginalFrame,
        const float alphaColorToAdd)
G
gineshidalgo99 已提交
367 368 369
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
370
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;
G
gineshidalgo99 已提交
371

372
        // Shared parameters
373 374
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
375 376 377 378
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(MPI_PAIRS_GPU) / (2*sizeof(MPI_PAIRS_GPU[0]));
379
        const auto numberScales = sizeof(MPI_SCALES) / sizeof(MPI_SCALES[0]);
G
gineshidalgo99 已提交
380
        const auto numberColors = sizeof(MPI_COLORS) / (3*sizeof(MPI_COLORS[0]));
381 382
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
383 384

        // Render key points
385 386 387 388
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
            targetHeight, posePtr, MPI_PAIRS_GPU, numberPeople, 15, numberPartPairs, MPI_COLORS, numberColors,
            radius, lineWidth, COCO_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame);
G
gineshidalgo99 已提交
389 390
    }

G
gineshidalgo99 已提交
391
    __global__ void renderPoseCar12(
392 393 394
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
395 396 397 398 399 400
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
401 402
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
403 404 405 406 407 408
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(CAR_12_PAIRS_GPU) / (2*sizeof(CAR_12_PAIRS_GPU[0]));
        const auto numberScales = sizeof(CAR_12_SCALES) / sizeof(CAR_12_SCALES[0]);
        const auto numberColors = sizeof(CAR_12_COLORS) / (3*sizeof(CAR_12_COLORS[0]));
409 410
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
411 412

        // Render key points
413 414 415 416 417
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
            targetHeight, posePtr, CAR_12_PAIRS_GPU, numberPeople, 12, numberPartPairs, CAR_12_COLORS, numberColors,
            radius, lineWidth, CAR_12_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame,
            (googlyEyes ? 4 : -1), (googlyEyes ? 5 : -1));
418 419
    }

G
gineshidalgo99 已提交
420
    __global__ void renderPoseCar22(
421 422 423
        float* targetPtr, float* minPtr, float* maxPtr, float* scalePtr, const int targetWidth, const int targetHeight,
        const float* const posePtr, const int numberPeople, const float threshold, const bool googlyEyes,
        const bool blendOriginalFrame, const float alphaColorToAdd)
424 425 426 427 428 429
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;
        const auto globalIdx = threadIdx.y * blockDim.x + threadIdx.x;

        // Shared parameters
430 431
        __shared__ float sharedMins[2*POSE_MAX_PEOPLE];
        __shared__ float sharedMaxs[2*POSE_MAX_PEOPLE];
432 433 434 435 436 437
        __shared__ float sharedScaleF[POSE_MAX_PEOPLE];

        // Other parameters
        const auto numberPartPairs = sizeof(CAR_22_PAIRS_GPU) / (2*sizeof(CAR_22_PAIRS_GPU[0]));
        const auto numberScales = sizeof(CAR_22_SCALES) / sizeof(CAR_22_SCALES[0]);
        const auto numberColors = sizeof(CAR_22_COLORS) / (3*sizeof(CAR_22_COLORS[0]));
438 439
        const auto radius = fastMinCuda(targetWidth, targetHeight) / 100.f;
        const auto lineWidth = fastMinCuda(targetWidth, targetHeight) / 120.f;
440 441

        // Render key points
442 443 444 445 446
        renderKeypoints(
            targetPtr, sharedMaxs, sharedMins, sharedScaleF, maxPtr, minPtr, scalePtr, globalIdx, x, y, targetWidth,
            targetHeight, posePtr, CAR_22_PAIRS_GPU, numberPeople, 22, numberPartPairs, CAR_22_COLORS, numberColors,
            radius, lineWidth, CAR_22_SCALES, numberScales, threshold, alphaColorToAdd, blendOriginalFrame,
            (googlyEyes ? 6 : -1), (googlyEyes ? 7 : -1));
447 448
    }

449 450 451 452
    __global__ void renderBodyPartHeatMaps(float* targetPtr, const int targetWidth, const int targetHeight,
                                           const float* const heatMapPtr, const int widthHeatMap,
                                           const int heightHeatMap, const float scaleToKeepRatio,
                                           const int numberBodyParts, const float alphaColorToAdd)
G
gineshidalgo99 已提交
453 454 455 456
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;

G
gineshidalgo99 已提交
457
        const auto numberColors = sizeof(COCO_COLORS)/(3*sizeof(COCO_COLORS[0]));
G
gineshidalgo99 已提交
458 459 460 461 462 463

        if (x < targetWidth && y < targetHeight)
        {
            float rgbColor [3] = {0.f,0.f,0.f};
            const auto xSource = (x + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
464 465
            const auto xHeatMap = fastTruncateCuda(int(xSource + 1e-5), 0, widthHeatMap);
            const auto yHeatMap = fastTruncateCuda(int(ySource + 1e-5), 0, heightHeatMap);
G
gineshidalgo99 已提交
466
            const auto heatMapArea = widthHeatMap * heightHeatMap;
467
            for (auto part = 0u ; part < numberBodyParts ; part++)
G
gineshidalgo99 已提交
468 469
            {
                const auto offsetOrigin = part * heatMapArea;
470 471
                // __saturatef = trucate to [0,1]
                const auto value = __saturatef(heatMapPtr[offsetOrigin + yHeatMap*widthHeatMap + xHeatMap]);
G
gineshidalgo99 已提交
472
                const auto rgbColorIndex = (part%numberColors)*3;
G
gineshidalgo99 已提交
473 474 475
                rgbColor[0] += value*COCO_COLORS[rgbColorIndex];
                rgbColor[1] += value*COCO_COLORS[rgbColorIndex+1];
                rgbColor[2] += value*COCO_COLORS[rgbColorIndex+2];
G
gineshidalgo99 已提交
476 477
            }

478 479
            const auto blueIndex = 3*(y * targetWidth + x);
            addColorWeighted(targetPtr[blueIndex+2], targetPtr[blueIndex+1], targetPtr[blueIndex], rgbColor,
480
                             alphaColorToAdd);
G
gineshidalgo99 已提交
481 482 483
        }
    }

484 485
    __global__ void renderBodyPartHeatMap(float* targetPtr, const int targetWidth, const int targetHeight,
                                          const float* const heatMapPtr, const int widthHeatMap,
G
Gines Hidalgo 已提交
486
                                          const int heightHeatMap, const float scaleToKeepRatio, const unsigned int part,
G
gineshidalgo99 已提交
487
                                          const float alphaColorToAdd, const bool absValue = false)
G
gineshidalgo99 已提交
488 489 490 491 492 493 494 495
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;

        if (x < targetWidth && y < targetHeight)
        {
            const auto xSource = (x + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
G
gineshidalgo99 已提交
496
            const auto* const heatMapPtrOffsetted = heatMapPtr + part * widthHeatMap * heightHeatMap;
497 498
            const auto interpolatedValue = bicubicInterpolate(heatMapPtrOffsetted, xSource, ySource, widthHeatMap,
                                                              heightHeatMap, widthHeatMap);
G
gineshidalgo99 已提交
499 500

            float rgbColor[3];
G
gineshidalgo99 已提交
501 502 503 504
            if (absValue)
                getColorHeatMap(rgbColor, fabsf(interpolatedValue), 0.f, 1.f);
            else
                getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);
G
gineshidalgo99 已提交
505

506 507
            const auto blueIndex = 3*(y * targetWidth + x);
            addColorWeighted(targetPtr[blueIndex+2], targetPtr[blueIndex+1], targetPtr[blueIndex], rgbColor,
508
                             alphaColorToAdd);
G
gineshidalgo99 已提交
509 510 511
        }
    }

512 513 514 515
    __global__ void renderPartAffinities(float* targetPtr, const int targetWidth, const int targetHeight,
                                         const float* const heatMapPtr, const int widthHeatMap,
                                         const int heightHeatMap, const float scaleToKeepRatio,
                                         const int partsToRender, const int initPart, const float alphaColorToAdd)
G
gineshidalgo99 已提交
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;

        if (x < targetWidth && y < targetHeight)
        {
            float rgbColor [3] = {0.f,0.f,0.f};
            const auto xSource = (x + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto heatMapArea = widthHeatMap * heightHeatMap;

            for (auto part = initPart ; part < initPart + partsToRender*2 ; part += 2)
            {
                int xIntArray[4];
                int yIntArray[4];
                float dx;
                float dy;
                cubicSequentialData(xIntArray, yIntArray, dx, dy, xSource, ySource, widthHeatMap, heightHeatMap);

                const auto offsetOriginX = part * heatMapArea;
                const auto offsetOriginY = (part+1) * heatMapArea;
                auto valueX = heatMapPtr[offsetOriginX + yIntArray[1]*widthHeatMap + xIntArray[1]];
                auto valueY = heatMapPtr[offsetOriginY + yIntArray[1]*widthHeatMap + xIntArray[1]];
                if (partsToRender == 1)
                {
                    const auto xB = heatMapPtr[offsetOriginX + yIntArray[1]*widthHeatMap + xIntArray[2]];
                    const auto xC = heatMapPtr[offsetOriginX + yIntArray[2]*widthHeatMap + xIntArray[1]];
                    const auto xD = heatMapPtr[offsetOriginX + yIntArray[2]*widthHeatMap + xIntArray[2]];
                    valueX = (1-dx)*(1-dy)*valueX
                           + dx*(1-dy)*xB
                           + (1-dx)*dy*xC
                           + dx*dy*xD;
                    const auto yB = heatMapPtr[offsetOriginY + yIntArray[1]*widthHeatMap + xIntArray[2]];
                    const auto yC = heatMapPtr[offsetOriginY + yIntArray[2]*widthHeatMap + xIntArray[1]];
                    const auto yD = heatMapPtr[offsetOriginY + yIntArray[2]*widthHeatMap + xIntArray[2]];
                    valueY = (1-dx)*(1-dy)*valueY
                           + dx*(1-dy)*yB
                           + (1-dx)*dy*yC
                           + dx*dy*yD;
                }

                float3 rgbColor2;
G
gineshidalgo99 已提交
558 559 560 561 562 563 564 565 566
                // if (forceNorm1)
                // {
                //     const auto norm = std::sqrt(valueX*valueX + valueY*valueY);
                //     if (norm > 0.05f)
                //         getColorXYAffinity(rgbColor2, valueX/norm, valueY/norm);
                //     else
                //         getColorXYAffinity(rgbColor2, valueX, valueY);
                // }
                // else
G
gineshidalgo99 已提交
567 568 569 570 571 572
                getColorXYAffinity(rgbColor2, valueX, valueY);
                rgbColor[0] += rgbColor2.x;
                rgbColor[1] += rgbColor2.y;
                rgbColor[2] += rgbColor2.z;
            }

573 574
            const auto blueIndex = 3*(y * targetWidth + x);
            addColorWeighted(targetPtr[blueIndex+2], targetPtr[blueIndex+1], targetPtr[blueIndex], rgbColor,
575
                             alphaColorToAdd);
G
gineshidalgo99 已提交
576 577 578
        }
    }

G
gineshidalgo99 已提交
579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
    __global__ void renderDistance(float* targetPtr, const int targetWidth, const int targetHeight,
                                   const float* const heatMapPtr, const int widthHeatMap, const int heightHeatMap,
                                   const float scaleToKeepRatio, const int part, const int numberBodyParts,
                                   const int numberBodyPAFChannels, const float alphaColorToAdd)
    {
        const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
        const auto y = (blockIdx.y * blockDim.y) + threadIdx.y;

        if (x < targetWidth && y < targetHeight)
        {
            const auto xSource = (x + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto ySource = (y + 0.5f) / scaleToKeepRatio - 0.5f;
            const auto heatMapOffset = part * widthHeatMap * heightHeatMap
                                     + (numberBodyParts+1+numberBodyPAFChannels)*widthHeatMap * heightHeatMap;
            const auto* const heatMapPtrOffsetted = heatMapPtr + heatMapOffset;
            const auto interpolatedValue = 0.5f
                                         + 0.5f * bicubicInterpolate(heatMapPtrOffsetted, xSource, ySource,
                                                                     widthHeatMap, heightHeatMap, widthHeatMap);

            float rgbColor[3];
            getColorHeatMap(rgbColor, interpolatedValue, 0.f, 1.f);

            const auto blueIndex = 3*(y * targetWidth + x);
            addColorWeighted(targetPtr[blueIndex+2], targetPtr[blueIndex+1], targetPtr[blueIndex], rgbColor,
                             alphaColorToAdd);
        }
    }

G
gineshidalgo99 已提交
607 608 609 610 611 612
    inline void checkAlpha(const float alphaColorToAdd)
    {
        if (alphaColorToAdd < 0.f || alphaColorToAdd > 1.f)
            error("Alpha must be in the range [0, 1].", __LINE__, __FUNCTION__, __FILE__);
    }

G
gineshidalgo99 已提交
613
    inline void renderPosePAFGpuAux(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
614 615 616
                                    const float* const heatMapPtr, const Point<int>& heatMapSize,
                                    const float scaleToKeepRatio, const int part, const int partsToRender,
                                    const float alphaBlending)
G
gineshidalgo99 已提交
617 618 619 620
    {
        try
        {
            //framePtr      =   width * height * 3
621
            //heatMapPtr    =   heatMapSize.x * heatMapSize.y * #body parts
G
gineshidalgo99 已提交
622
            checkAlpha(alphaBlending);
623
            const auto heatMapOffset = getPoseNumberBodyParts(poseModel) * heatMapSize.area();
G
gineshidalgo99 已提交
624 625
            dim3 threadsPerBlock;
            dim3 numBlocks;
626
            getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
627 628 629
            renderPartAffinities<<<threadsPerBlock, numBlocks>>>(framePtr, frameSize.x, frameSize.y, heatMapPtr,
                                                                 heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
                                                                 partsToRender, part, alphaBlending);
G
gineshidalgo99 已提交
630 631 632 633 634 635 636 637
            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

638 639 640 641
    void renderPoseKeypointsGpu(
        float* framePtr, float* maxPtr, float* minPtr, float* scalePtr, const PoseModel poseModel,
        const int numberPeople, const Point<int>& frameSize, const float* const posePtr,
        const float renderThreshold, const bool googlyEyes, const bool blendOriginalFrame, const float alphaBlending)
G
gineshidalgo99 已提交
642 643 644 645 646
    {
        try
        {
            if (numberPeople > 0 || !blendOriginalFrame)
            {
647 648 649 650 651
                // framePtr      =   width * height * 3
                // heatMapPtr    =   heatMapSize.x * heatMapSize.y * #body parts
                // posePtr       =   3 (x,y,score) * #Body parts * numberPeople
                if (googlyEyes && (poseModel == PoseModel::MPI_15 || poseModel == PoseModel::MPI_15_4))
                    error("Bool googlyEyes not compatible with MPI models.",
652
                          __LINE__, __FUNCTION__, __FILE__);
653 654 655
                if (numberPeople > POSE_MAX_PEOPLE)
                    error("Rendering assumes that numberPeople <= POSE_MAX_PEOPLE = " + std::to_string(POSE_MAX_PEOPLE)
                          + ".", __LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
656 657 658

                dim3 threadsPerBlock;
                dim3 numBlocks;
659
                getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
G
gineshidalgo99 已提交
660

661
                // Body pose
662 663 664 665 666
                const dim3 threadsPerBlockBoundBox = {1, 1, 1};
                const dim3 numBlocksBox{getNumberCudaBlocks(POSE_MAX_PEOPLE, threadsPerBlockBoundBox.x)};
                getBoundingBoxPerPersonPose<<<threadsPerBlockBoundBox, numBlocksBox>>>(
                    maxPtr, minPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                    getPoseNumberBodyParts(poseModel), renderThreshold);
667 668
                if (poseModel == PoseModel::BODY_25 || poseModel == PoseModel::BODY_25D
                    || poseModel == PoseModel::BODY_25E)
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
                {
                    // const auto REPS = 1000;
                    // double timeNormalize0 = 0.;
                    // double timeNormalize1 = 0.;

                    // // Non-optimized code
                    // OP_CUDA_PROFILE_INIT(REPS);
                    // renderPoseBody25Old<<<threadsPerBlock, numBlocks>>>(
                    //     framePtr, frameSize.x, frameSize.y, posePtr, numberPeople, renderThreshold, googlyEyes,
                    //     blendOriginalFrame, alphaBlending
                    // );
                    // OP_CUDA_PROFILE_END(timeNormalize0, 1e3, REPS);

                    // Optimized code
                    // OP_CUDA_PROFILE_INIT(REPS);
                    // const dim3 threadsPerBlockBoundBox = {1, 1, 1};
                    // const dim3 numBlocksBox{getNumberCudaBlocks(POSE_MAX_PEOPLE, threadsPerBlockBoundBox.x)};
                    // getBoundingBoxPerPersonPose<<<threadsPerBlockBoundBox, numBlocksBox>>>(
                    //     maxPtr, minPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople, 25,
                    //     renderThreshold);
689
                    renderPoseBody25<<<threadsPerBlock, numBlocks>>>(
690 691
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
692
                    );
693 694 695 696 697 698
                    // OP_CUDA_PROFILE_END(timeNormalize1, 1e3, REPS);

                    // // Profiling code
                    // log("  renderOld=" + std::to_string(timeNormalize0) + "ms");
                    // log("  renderNew=" + std::to_string(timeNormalize1) + "ms");
                }
699
                else if (poseModel == PoseModel::COCO_18)
700
                    renderPoseCoco<<<threadsPerBlock, numBlocks>>>(
701 702
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
703
                    );
G
gineshidalgo99 已提交
704 705
                else if (poseModel == PoseModel::BODY_19 || poseModel == PoseModel::BODY_19E
                         || poseModel == PoseModel::BODY_19N || poseModel == PoseModel::BODY_19_X2)
706
                    renderPoseBody19<<<threadsPerBlock, numBlocks>>>(
707 708
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
709
                    );
G
gineshidalgo99 已提交
710 711
                else if (poseModel == PoseModel::BODY_23)
                    renderPoseBody23<<<threadsPerBlock, numBlocks>>>(
712 713
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
G
gineshidalgo99 已提交
714
                    );
715 716
                else if (poseModel == PoseModel::BODY_25B)
                    renderPoseBody25b<<<threadsPerBlock, numBlocks>>>(
717 718
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
719
                    );
G
gineshidalgo99 已提交
720
                else if (poseModel == PoseModel::BODY_135)
721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
                {
                    // const auto REPS = 500;
                    // double timeNormalize1 = 0.;
                    // double timeNormalize2 = 0.;

                    // // Non-optimized code
                    // OP_CUDA_PROFILE_INIT(REPS);
                    //  renderPoseBody135Old<<<threadsPerBlock, numBlocks>>>(
                    //      framePtr, frameSize.x, frameSize.y, posePtr, numberPeople, renderThreshold, googlyEyes,
                    //      blendOriginalFrame, alphaBlending
                    // );
                    // OP_CUDA_PROFILE_END(timeNormalize1, 1e3, REPS);

                    // Optimized code
                    // OP_CUDA_PROFILE_INIT(REPS);
                    // const dim3 threadsPerBlockBoundBox = {1, 1, 1};
                    // const dim3 numBlocksBox{getNumberCudaBlocks(POSE_MAX_PEOPLE, threadsPerBlockBoundBox.x)};
                    // getBoundingBoxPerPersonPose<<<threadsPerBlockBoundBox, numBlocksBox>>>(
                    //     maxPtr, minPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople, 135,
                    //     renderThreshold);
G
gineshidalgo99 已提交
741
                    renderPoseBody135<<<threadsPerBlock, numBlocks>>>(
742 743
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
G
gineshidalgo99 已提交
744
                    );
745 746 747 748 749 750
                    // OP_CUDA_PROFILE_END(timeNormalize2, 1e3, REPS);

                    // // Profiling code
                    // log("  renderOld=" + std::to_string(timeNormalize1) + "ms");
                    // log("  renderNew=" + std::to_string(timeNormalize2) + "ms");
                }
G
gineshidalgo99 已提交
751
                else if (poseModel == PoseModel::MPI_15 || poseModel == PoseModel::MPI_15_4)
752
                    renderPoseMpi29Parts<<<threadsPerBlock, numBlocks>>>(
753 754
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, blendOriginalFrame, alphaBlending
755
                    );
756 757 758
                // Car pose
                else if (poseModel == PoseModel::CAR_12)
                    renderPoseCar12<<<threadsPerBlock, numBlocks>>>(
759 760
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
761
                    );
762 763
                else if (poseModel == PoseModel::CAR_22)
                    renderPoseCar22<<<threadsPerBlock, numBlocks>>>(
764 765
                        framePtr, minPtr, maxPtr, scalePtr, frameSize.x, frameSize.y, posePtr, numberPeople,
                        renderThreshold, googlyEyes, blendOriginalFrame, alphaBlending
766
                    );
767
                // Unknown
G
gineshidalgo99 已提交
768
                else
769
                    error("Invalid Model.", __LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
770 771 772 773 774 775 776 777 778
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
            }
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
Gines Hidalgo 已提交
779 780 781
    void renderPoseHeatMapGpu(float* framePtr, const Point<int>& frameSize, const float* const heatMapPtr,
                              const Point<int>& heatMapSize, const float scaleToKeepRatio, const unsigned int part,
                              const float alphaBlending)
G
gineshidalgo99 已提交
782 783 784 785
    {
        try
        {
            //framePtr      =   width * height * 3
786
            //heatMapPtr    =   heatMapSize.x * heatMapSize.y * #body parts
G
gineshidalgo99 已提交
787 788 789
            checkAlpha(alphaBlending);
            dim3 threadsPerBlock;
            dim3 numBlocks;
790
            getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
G
gineshidalgo99 已提交
791

792 793
            renderBodyPartHeatMap<<<threadsPerBlock, numBlocks>>>(
                framePtr, frameSize.x, frameSize.y, heatMapPtr, heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
G
gineshidalgo99 已提交
794
                part, alphaBlending
795
            );
G
gineshidalgo99 已提交
796 797 798 799 800 801 802 803
            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

804 805 806
    void renderPoseHeatMapsGpu(float* framePtr, const PoseModel poseModel, const Point<int>& frameSize,
                               const float* const heatMapPtr, const Point<int>& heatMapSize,
                               const float scaleToKeepRatio, const float alphaBlending)
G
gineshidalgo99 已提交
807 808 809 810
    {
        try
        {
            //framePtr      =   width * height * 3
811
            //heatMapPtr    =   heatMapSize.x * heatMapSize.y * #body parts
G
gineshidalgo99 已提交
812 813 814
            checkAlpha(alphaBlending);
            dim3 threadsPerBlock;
            dim3 numBlocks;
815
            getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
816
            const auto numberBodyParts = getPoseNumberBodyParts(poseModel);
G
gineshidalgo99 已提交
817 818
            const auto heatMapOffset = numberBodyParts * heatMapSize.area();

819 820 821 822
            renderBodyPartHeatMaps<<<threadsPerBlock, numBlocks>>>(
                framePtr, frameSize.x, frameSize.y, heatMapPtr, heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
                numberBodyParts, alphaBlending
            );
G
gineshidalgo99 已提交
823 824 825 826 827 828 829 830
            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

831 832 833
    void renderPosePAFGpu(
        float* framePtr, const PoseModel poseModel, const Point<int>& frameSize, const float* const heatMapPtr,
        const Point<int>& heatMapSize, const float scaleToKeepRatio, const int part, const float alphaBlending)
G
gineshidalgo99 已提交
834 835 836
    {
        try
        {
837 838
            renderPosePAFGpuAux(framePtr, poseModel, frameSize, heatMapPtr, heatMapSize, scaleToKeepRatio, part, 1,
                                alphaBlending);
G
gineshidalgo99 已提交
839 840 841 842 843 844 845
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

846 847 848
    void renderPosePAFsGpu(
        float* framePtr, const PoseModel poseModel, const Point<int>& frameSize, const float* const heatMapPtr,
        const Point<int>& heatMapSize, const float scaleToKeepRatio, const float alphaBlending)
G
gineshidalgo99 已提交
849 850 851
    {
        try
        {
852
            const auto numberBodyPartPairs = (int)getPosePartPairs(poseModel).size()/2;
853 854
            renderPosePAFGpuAux(
                framePtr, poseModel, frameSize, heatMapPtr, heatMapSize, scaleToKeepRatio,
855
                getPoseNumberBodyParts(poseModel) + (addBkgChannel(poseModel) ? 1 : 0),
856
                numberBodyPartPairs, alphaBlending);
G
gineshidalgo99 已提交
857 858 859 860 861 862
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }
G
gineshidalgo99 已提交
863

864 865 866
    void renderPoseDistanceGpu(
        float* framePtr, const Point<int>& frameSize, const float* const heatMapPtr, const Point<int>& heatMapSize,
        const float scaleToKeepRatio, const unsigned int part, const float alphaBlending)
G
gineshidalgo99 已提交
867 868 869
    {
        try
        {
G
gineshidalgo99 已提交
870 871 872 873 874 875 876 877
            // // As PAF
            // const bool forceNorm1 = true;
            // renderPosePAFGpuAux(framePtr, poseModel, frameSize, heatMapPtr, heatMapSize, scaleToKeepRatio, part, 1,
            //                     alphaBlending, forceNorm1);

            // As body part
            // framePtr      =   width * height * 3
            // heatMapPtr    =   heatMapSize.x * heatMapSize.y * #body parts
G
gineshidalgo99 已提交
878 879 880 881
            checkAlpha(alphaBlending);
            dim3 threadsPerBlock;
            dim3 numBlocks;
            getNumberCudaThreadsAndBlocks(threadsPerBlock, numBlocks, frameSize);
G
gineshidalgo99 已提交
882 883 884

            const auto absValue = true;
            renderBodyPartHeatMap<<<threadsPerBlock, numBlocks>>>(
G
gineshidalgo99 已提交
885
                framePtr, frameSize.x, frameSize.y, heatMapPtr, heatMapSize.x, heatMapSize.y, scaleToKeepRatio,
G
gineshidalgo99 已提交
886
                part, alphaBlending, absValue);
G
gineshidalgo99 已提交
887 888 889 890 891 892 893
            cudaCheck(__LINE__, __FUNCTION__, __FILE__);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }
G
gineshidalgo99 已提交
894
}