faceGpuRenderer.cpp 4.4 KB
Newer Older
1
#ifdef USE_CUDA
G
gineshidalgo99 已提交
2 3 4 5
    #include <cuda.h>
    #include <cuda_runtime_api.h>
#endif
#include <openpose/face/renderFace.hpp>
G
gineshidalgo99 已提交
6
#include <openpose/gpu/cuda.hpp>
7
#include <openpose/face/faceGpuRenderer.hpp>
8 9 10

namespace op
{
11 12
    FaceGpuRenderer::FaceGpuRenderer(const float renderThreshold, const float alphaKeypoint,
                                     const float alphaHeatMap) :
13 14 15 16 17
        GpuRenderer{renderThreshold, alphaKeypoint, alphaHeatMap},
        pGpuFace{nullptr},
        pMaxPtr{nullptr},
        pMinPtr{nullptr},
        pScalePtr{nullptr}
18 19 20
    {
    }

21
    FaceGpuRenderer::~FaceGpuRenderer()
22 23 24
    {
        try
        {
G
gineshidalgo99 已提交
25
            // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
26
            #ifdef USE_CUDA
27
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
28
                if (pGpuFace != nullptr)
29
                {
30
                    cudaFree(pGpuFace);
31 32
                    pGpuFace = nullptr;
                }
33
                if (pMaxPtr != nullptr)
34
                {
35
                    cudaFree(pMaxPtr);
36 37
                    pMaxPtr = nullptr;
                }
38
                if (pMinPtr != nullptr)
39
                {
40
                    cudaFree(pMinPtr);
41 42
                    pMinPtr = nullptr;
                }
43
                if (pScalePtr != nullptr)
44
                {
45
                    cudaFree(pScalePtr);
46 47 48
                    pScalePtr = nullptr;
                }
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
49
            #endif
50 51 52
        }
        catch (const std::exception& e)
        {
53
            errorDestructor(e.what(), __LINE__, __FUNCTION__, __FILE__);
54 55 56
        }
    }

57
    void FaceGpuRenderer::initializationOnThread()
58 59 60
    {
        try
        {
G
gineshidalgo99 已提交
61 62
            log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
            // GPU memory allocation for rendering
63
            #ifdef USE_CUDA
G
gineshidalgo99 已提交
64
                cudaMalloc((void**)(&pGpuFace), POSE_MAX_PEOPLE * FACE_NUMBER_PARTS * 3 * sizeof(float));
65 66 67
                cudaMalloc((void**)&pMaxPtr, sizeof(float) * 2 * FACE_NUMBER_PARTS);
                cudaMalloc((void**)&pMinPtr, sizeof(float) * 2 * FACE_NUMBER_PARTS);
                cudaMalloc((void**)&pScalePtr, sizeof(float) * FACE_NUMBER_PARTS);
G
gineshidalgo99 已提交
68 69
            #endif
            log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
70 71 72 73 74 75 76
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

77
    void FaceGpuRenderer::renderFaceInherited(Array<float>& outputData, const Array<float>& faceKeypoints)
78 79 80 81
    {
        try
        {
            // GPU rendering
82
            #ifdef USE_CUDA
G
gineshidalgo99 已提交
83
                // I prefer std::round(T&) over positiveIntRound(T) for std::atomic
84
                const auto elementRendered = spElementToRender->load();
G
gineshidalgo99 已提交
85
                const auto numberPeople = faceKeypoints.getSize(0);
86
                const Point<int> frameSize{outputData.getSize(1), outputData.getSize(0)};
G
gineshidalgo99 已提交
87 88 89
                if (numberPeople > 0 && elementRendered == 0)
                {
                    // Draw faceKeypoints
90
                    cpuToGpuMemoryIfNotCopiedYet(outputData.getPtr(), outputData.getVolume());
G
gineshidalgo99 已提交
91 92
                    cudaMemcpy(pGpuFace, faceKeypoints.getConstPtr(),
                               faceKeypoints.getSize(0) * FACE_NUMBER_PARTS * 3 * sizeof(float),
G
gineshidalgo99 已提交
93
                               cudaMemcpyHostToDevice);
94 95 96
                    renderFaceKeypointsGpu(
                        *spGpuMemory, pMaxPtr, pMinPtr, pScalePtr, frameSize, pGpuFace, faceKeypoints.getSize(0),
                        mRenderThreshold, getAlphaKeypoint());
G
gineshidalgo99 已提交
97 98 99 100
                    // CUDA check
                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
                }
                // GPU memory to CPU if last renderer
101
                gpuToCpuMemoryIfLastRenderer(outputData.getPtr(), outputData.getVolume());
102
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
103 104 105
            #else
                UNUSED(outputData);
                UNUSED(faceKeypoints);
106 107
                error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
                      " functionality.", __LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
108
            #endif
109 110 111 112 113 114 115
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }
}