poseGpuRenderer.cpp 11.5 KB
Newer Older
1
#ifdef USE_CUDA
2 3 4 5 6
    #include <cuda.h>
    #include <cuda_runtime_api.h>
#endif
#include <openpose/pose/poseParameters.hpp>
#include <openpose/pose/renderPose.hpp>
G
gineshidalgo99 已提交
7
#include <openpose/gpu/cuda.hpp>
8
#include <openpose/utilities/keypoint.hpp>
9 10 11 12
#include <openpose/pose/poseGpuRenderer.hpp>

namespace op
{
13 14
    PoseGpuRenderer::PoseGpuRenderer(const PoseModel poseModel,
                                     const std::shared_ptr<PoseExtractorNet>& poseExtractorNet,
15 16
                                     const float renderThreshold, const bool blendOriginalFrame,
                                     const float alphaKeypoint, const float alphaHeatMap,
17
                                     const unsigned int elementToRender) :
18 19
        // #body elements to render = #body parts (size()) + #body part pair connections
        //                          + 3 (+whole pose +whole heatmaps +PAFs)
20 21
        // POSE_BODY_PART_MAPPING crashes on Windows, replaced by getPoseBodyPartMapping
        GpuRenderer{renderThreshold, alphaKeypoint, alphaHeatMap, blendOriginalFrame, elementToRender,
22
                    getNumberElementsToRender(poseModel)}, // mNumberElementsToRender
23
        PoseRenderer{poseModel},
24
        spPoseExtractorNet{poseExtractorNet},
25 26 27 28
        pGpuPose{nullptr},
        pMaxPtr{nullptr},
        pMinPtr{nullptr},
        pScalePtr{nullptr}
29 30 31 32 33 34 35
    {
    }

    PoseGpuRenderer::~PoseGpuRenderer()
    {
        try
        {
G
gineshidalgo99 已提交
36
            // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
37
            log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
38
            #ifdef USE_CUDA
39
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
40
                if (pGpuPose != nullptr)
41
                {
42
                    cudaFree(pGpuPose);
43 44
                    pGpuPose = nullptr;
                }
45
                if (pMaxPtr != nullptr)
46
                {
47
                    cudaFree(pMaxPtr);
48 49
                    pMaxPtr = nullptr;
                }
50
                if (pMinPtr != nullptr)
51
                {
52
                    cudaFree(pMinPtr);
53 54
                    pMinPtr = nullptr;
                }
55
                if (pScalePtr != nullptr)
56
                {
57
                    cudaFree(pScalePtr);
58 59 60
                    pScalePtr = nullptr;
                }
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
61
            #endif
62
            log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
63 64 65
        }
        catch (const std::exception& e)
        {
66
            errorDestructor(e.what(), __LINE__, __FUNCTION__, __FILE__);
67 68 69 70 71 72 73 74 75
        }
    }

    void PoseGpuRenderer::initializationOnThread()
    {
        try
        {
            log("Starting initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
            // GPU memory allocation for rendering
76 77
            #ifdef USE_CUDA
                cudaMalloc((void**)(&pGpuPose),
78 79 80 81
                    POSE_MAX_PEOPLE * getPoseNumberBodyParts(mPoseModel) * 3 * sizeof(float));
                cudaMalloc((void**)&pMaxPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
                cudaMalloc((void**)&pMinPtr, sizeof(float) * 2 * POSE_MAX_PEOPLE);
                cudaMalloc((void**)&pScalePtr, sizeof(float) * POSE_MAX_PEOPLE);
82 83 84 85 86 87 88 89 90 91
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
            #endif
            log("Finished initialization on thread.", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

92
    std::pair<int, std::string> PoseGpuRenderer::renderPose(
G
Gines Hidalgo 已提交
93 94
        Array<float>& outputData, const Array<float>& poseKeypoints, const float scaleInputToOutput,
        const float scaleNetToOutput)
95 96 97
    {
        try
        {
98
            // Sanity check
99 100 101 102 103
            if (outputData.empty())
                error("Empty Array<float> outputData.", __LINE__, __FUNCTION__, __FILE__);
            // GPU rendering
            const auto elementRendered = spElementToRender->load();
            std::string elementRenderedName;
104
            #ifdef USE_CUDA
105 106 107 108
                const auto numberPeople = poseKeypoints.getSize(0);
                if (numberPeople > 0 || elementRendered != 0 || !mBlendOriginalFrame)
                {
                    cpuToGpuMemoryIfNotCopiedYet(outputData.getPtr(), outputData.getVolume());
109
                    const auto numberBodyParts = getPoseNumberBodyParts(mPoseModel);
110
                    const auto hasBkg = addBkgChannel(mPoseModel);
111
                    const auto numberBodyPartsPlusBkg = numberBodyParts + (hasBkg ? 1 : 0);
G
gineshidalgo99 已提交
112
                    const auto numberBodyPAFChannels = getPosePartPairs(mPoseModel).size();
113
                    const Point<int> frameSize{outputData.getSize(1), outputData.getSize(0)};
114 115 116
                    // Draw poseKeypoints
                    if (elementRendered == 0)
                    {
117 118 119 120
                        // Rescale keypoints to output size
                        auto poseKeypointsRescaled = poseKeypoints.clone();
                        scaleKeypoints(poseKeypointsRescaled, scaleInputToOutput);
                        // Render keypoints
121
                        if (!poseKeypoints.empty())
122 123 124
                            cudaMemcpy(
                                pGpuPose, poseKeypointsRescaled.getConstPtr(),
                                numberPeople * numberBodyParts * 3 * sizeof(float), cudaMemcpyHostToDevice);
125 126 127
                        renderPoseKeypointsGpu(
                            *spGpuMemory, pMaxPtr, pMinPtr, pScalePtr, mPoseModel, numberPeople, frameSize, pGpuPose,
                            mRenderThreshold, mShowGooglyEyes, mBlendOriginalFrame, getAlphaKeypoint());
128 129 130
                    }
                    else
                    {
131
                        // If resized to input resolution: Replace scaleNetToOutput * scaleInputToOutput by
132 133
                        // scaleInputToOutput, and comment the sanity check.
                        // Sanity check
134 135
                        if (scaleNetToOutput == -1.f)
                            error("Non valid scaleNetToOutput.", __LINE__, __FUNCTION__, __FILE__);
136
                        // Parameters
137
                        const auto& heatMapSizes = spPoseExtractorNet->getHeatMapSize();
138
                        const Point<int> heatMapSize{heatMapSizes[3], heatMapSizes[2]};
G
gineshidalgo99 已提交
139
                        const auto lastPAFChannel = numberBodyPartsPlusBkg+2+numberBodyPAFChannels/2;
140 141 142
                        // Add all heatmaps
                        if (elementRendered == 2)
                        // if (elementRendered == numberBodyPartsPlusBkg+1)
143 144
                        {
                            elementRenderedName = "Heatmaps";
G
Gines Hidalgo 已提交
145 146 147 148
                            renderPoseHeatMapsGpu(
                                *spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(),
                                heatMapSize, scaleNetToOutput * scaleInputToOutput,
                                (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
149 150
                        }
                        // Draw PAFs (Part Affinity Fields)
151 152
                        else if (elementRendered == 3)
                        // else if (elementRendered == numberBodyPartsPlusBkg+2)
153 154
                        {
                            elementRenderedName = "PAFs (Part Affinity Fields)";
G
Gines Hidalgo 已提交
155 156 157 158
                            renderPosePAFsGpu(
                                *spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(),
                                heatMapSize, scaleNetToOutput * scaleInputToOutput,
                                (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
159 160 161 162 163
                        }
                        // Draw specific body part or background
                        else if (elementRendered <= numberBodyPartsPlusBkg+2)
                        {
                            const auto realElementRendered = (elementRendered == 1
164 165
                                                                ? (hasBkg ? numberBodyParts : 0)
                                                                : elementRendered - 3 - (hasBkg ? 1:0));
G
gineshidalgo99 已提交
166
                            elementRenderedName = mPartIndexToName.at(realElementRendered);
G
Gines Hidalgo 已提交
167 168 169 170
                            renderPoseHeatMapGpu(
                                *spGpuMemory, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize,
                                scaleNetToOutput * scaleInputToOutput, realElementRendered,
                                (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
171 172
                        }
                        // Draw affinity between 2 body parts
G
gineshidalgo99 已提交
173
                        else if (elementRendered <= lastPAFChannel)
174 175
                        {
                            const auto affinityPart = (elementRendered-numberBodyPartsPlusBkg-3)*2;
G
gineshidalgo99 已提交
176 177
                            const auto affinityPartMapped = numberBodyPartsPlusBkg
                                                          + getPoseMapIndex(mPoseModel).at(affinityPart);
178 179
                            elementRenderedName = mPartIndexToName.at(affinityPartMapped);
                            elementRenderedName = elementRenderedName.substr(0, elementRenderedName.find("("));
G
Gines Hidalgo 已提交
180 181 182 183
                            renderPosePAFGpu(
                                *spGpuMemory, mPoseModel, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(),
                                heatMapSize, scaleNetToOutput * scaleInputToOutput, affinityPartMapped,
                                (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
184
                        }
G
gineshidalgo99 已提交
185 186 187 188 189 190
                        // Draw neck-part distance channel
                        else
                        {
                            if (mPoseModel != PoseModel::BODY_25D)
                                error("Neck-part distance channel only for BODY_25D.",
                                      __LINE__, __FUNCTION__, __FILE__);
G
gineshidalgo99 已提交
191
                            const auto distancePart = (elementRendered - lastPAFChannel - 1);
G
Gines Hidalgo 已提交
192 193
                            const auto distancePartMapped = (unsigned int)(
                                numberBodyPartsPlusBkg + numberBodyPAFChannels + distancePart);
G
gineshidalgo99 已提交
194
                            elementRenderedName = mPartIndexToName.at(distancePartMapped);
G
Gines Hidalgo 已提交
195 196 197 198
                            renderPoseDistanceGpu(
                                *spGpuMemory, frameSize, spPoseExtractorNet->getHeatMapGpuConstPtr(), heatMapSize,
                                scaleNetToOutput * scaleInputToOutput, distancePartMapped,
                                (mBlendOriginalFrame ? getAlphaHeatMap() : 1.f));
G
gineshidalgo99 已提交
199
                        }
200 201 202 203 204 205 206 207
                    }
                }
                // GPU memory to CPU if last renderer
                gpuToCpuMemoryIfLastRenderer(outputData.getPtr(), outputData.getVolume());
                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
            #else
                UNUSED(outputData);
                UNUSED(poseKeypoints);
G
gineshidalgo99 已提交
208
                UNUSED(scaleInputToOutput);
209
                UNUSED(scaleNetToOutput);
210
                error("OpenPose must be compiled with the `USE_CUDA` macro definitions in order to run this"
211 212
                      " functionality. You can alternatively use CPU rendering (flag `--render_pose 1`).",
                      __LINE__, __FUNCTION__, __FILE__);
213 214 215 216 217 218 219 220 221 222 223
            #endif
            // Return result
            return std::make_pair(elementRendered, elementRenderedName);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
            return std::make_pair(-1, "");
        }
    }
}