bodyPartConnectorCaffe.cpp 11.8 KB
Newer Older
G
gineshidalgo99 已提交
1
#ifdef USE_CAFFE
2 3
    #include <caffe/blob.hpp>
#endif
4 5 6
#ifdef USE_CUDA
    #include <openpose/gpu/cuda.hpp>
#endif
G
gineshidalgo99 已提交
7
#include <openpose/net/bodyPartConnectorBase.hpp>
8
#include <openpose/pose/poseParameters.hpp>
G
gineshidalgo99 已提交
9
#include <openpose/net/bodyPartConnectorCaffe.hpp>
G
gineshidalgo99 已提交
10 11 12 13

namespace op
{
    template <typename T>
14
    BodyPartConnectorCaffe<T>::BodyPartConnectorCaffe() :
G
gineshidalgo99 已提交
15 16
        mPoseModel{PoseModel::Size},
        mMaximizePositives{false},
17 18 19
        pBodyPartPairsGpuPtr{nullptr},
        pMapIdxGpuPtr{nullptr},
        pFinalOutputGpuPtr{nullptr}
G
gineshidalgo99 已提交
20
    {
21 22 23 24 25 26 27 28 29 30 31
        try
        {
            #ifndef USE_CAFFE
                error("OpenPose must be compiled with the `USE_CAFFE` macro definition in order to use this"
                      " functionality.", __LINE__, __FUNCTION__, __FILE__);
            #endif
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
G
gineshidalgo99 已提交
32 33
    }

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
    template <typename T>
    BodyPartConnectorCaffe<T>::~BodyPartConnectorCaffe()
    {
        try
        {
            #if defined USE_CAFFE && defined USE_CUDA
                cudaFree(pBodyPartPairsGpuPtr);
                cudaFree(pMapIdxGpuPtr);
                cudaFree(pFinalOutputGpuPtr);
            #endif
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
gineshidalgo99 已提交
51
    template <typename T>
52
    void BodyPartConnectorCaffe<T>::Reshape(const std::vector<caffe::Blob<T>*>& bottom)
G
gineshidalgo99 已提交
53 54 55
    {
        try
        {
56 57 58 59 60 61 62 63 64
            #ifdef USE_CAFFE
                auto heatMapsBlob = bottom.at(0);
                auto peaksBlob = bottom.at(1);

                // Top shape
                const auto maxPeaks = peaksBlob->shape(2) - 1;
                const auto numberBodyParts = peaksBlob->shape(1);

                // Array sizes
65
                mTopSize = std::array<int, 4>{1, maxPeaks, numberBodyParts, 3};
G
gineshidalgo99 已提交
66 67 68 69
                mHeatMapsSize = std::array<int, 4>{
                    heatMapsBlob->shape(0), heatMapsBlob->shape(1), heatMapsBlob->shape(2), heatMapsBlob->shape(3)};
                mPeaksSize = std::array<int, 4>{
                    peaksBlob->shape(0), peaksBlob->shape(1), peaksBlob->shape(2), peaksBlob->shape(3)};
70 71 72
            #else
                UNUSED(bottom);
            #endif
G
gineshidalgo99 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
    void BodyPartConnectorCaffe<T>::setPoseModel(const PoseModel poseModel)
    {
        try
        {
            mPoseModel = {poseModel};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
gineshidalgo99 已提交
93 94 95 96 97 98 99 100 101 102 103 104 105
    template <typename T>
    void BodyPartConnectorCaffe<T>::setMaximizePositives(const bool maximizePositives)
    {
        try
        {
            mMaximizePositives = {maximizePositives};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
gineshidalgo99 已提交
106
    template <typename T>
107
    void BodyPartConnectorCaffe<T>::setInterMinAboveThreshold(const T interMinAboveThreshold)
G
gineshidalgo99 已提交
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
    {
        try
        {
            mInterMinAboveThreshold = {interMinAboveThreshold};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
    void BodyPartConnectorCaffe<T>::setInterThreshold(const T interThreshold)
    {
        try
        {
            mInterThreshold = {interThreshold};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
    void BodyPartConnectorCaffe<T>::setMinSubsetCnt(const int minSubsetCnt)
    {
        try
        {
            mMinSubsetCnt = {minSubsetCnt};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
    void BodyPartConnectorCaffe<T>::setMinSubsetScore(const T minSubsetScore)
    {
        try
        {
            mMinSubsetScore = {minSubsetScore};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
    void BodyPartConnectorCaffe<T>::setScaleNetToOutput(const T scaleNetToOutput)
    {
        try
        {
            mScaleNetToOutput = {scaleNetToOutput};
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
gineshidalgo99 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
    template <typename T>
    void BodyPartConnectorCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
                                            Array<T>& poseScores)
    {
        try
        {
            // CUDA
            #ifdef USE_CUDA
                Forward_gpu(bottom, poseKeypoints, poseScores);
            // OpenCL or CPU
            #else
                Forward_cpu(bottom, poseKeypoints, poseScores);
            #endif
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

G
gineshidalgo99 已提交
191
    template <typename T>
192 193
    void BodyPartConnectorCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
                                                Array<T>& poseScores)
G
gineshidalgo99 已提交
194 195 196
    {
        try
        {
197 198
            #ifdef USE_CAFFE
                const auto heatMapsBlob = bottom.at(0);
199
                const auto* const heatMapsPtr = heatMapsBlob->cpu_data();                 // ~8.5 ms COCO, ~27ms BODY_65
200
                const auto* const peaksPtr = bottom.at(1)->cpu_data();                    // ~0.02ms
201
                const auto maxPeaks = mTopSize[1];
202
                connectBodyPartsCpu(poseKeypoints, poseScores, heatMapsPtr, peaksPtr, mPoseModel,
203 204
                                    Point<int>{heatMapsBlob->shape(3), heatMapsBlob->shape(2)},
                                    maxPeaks, mInterMinAboveThreshold, mInterThreshold,
G
gineshidalgo99 已提交
205
                                    mMinSubsetCnt, mMinSubsetScore, mScaleNetToOutput, mMaximizePositives);
206 207 208 209
            #else
                UNUSED(bottom);
                UNUSED(poseKeypoints);
            #endif
G
gineshidalgo99 已提交
210 211 212 213 214 215 216 217
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
G
gineshidalgo99 已提交
218 219
    void BodyPartConnectorCaffe<T>::Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
                                                Array<T>& poseScores)
G
gineshidalgo99 已提交
220 221 222
    {
        try
        {
223
            #if defined USE_CAFFE && defined USE_CUDA
224
                // Global data
225
                const auto heatMapsBlob = bottom.at(0);
G
gineshidalgo99 已提交
226
                const auto* const heatMapsGpuPtr = heatMapsBlob->gpu_data();
227
                const auto* const peaksPtr = bottom.at(1)->cpu_data();
228
                const auto maxPeaks = mTopSize[1];
229 230 231 232 233 234 235 236 237 238 239 240 241
                const auto* const peaksGpuPtr = bottom.at(1)->gpu_data();

                // Initialize fixed pointers (1-time task) - It must be done in the same thread than Forward_gpu
                if (pBodyPartPairsGpuPtr == nullptr || pMapIdxGpuPtr == nullptr)
                {
                    // Free previous memory
                    cudaFree(pBodyPartPairsGpuPtr);
                    cudaFree(pMapIdxGpuPtr);
                    // Data
                    const auto& bodyPartPairs = getPosePartPairs(mPoseModel);
                    const auto numberBodyParts = getPoseNumberBodyParts(mPoseModel);
                    const auto& mapIdxOffset = getPoseMapIndex(mPoseModel);
                    // Update mapIdx
242
                    const auto offset = (addBkgChannel(mPoseModel) ? 1 : 0);
243 244
                    auto mapIdx = mapIdxOffset;
                    for (auto& i : mapIdx)
245
                        i += (numberBodyParts+offset);
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
                    // Re-allocate memory
                    cudaMalloc((void **)&pBodyPartPairsGpuPtr, bodyPartPairs.size() * sizeof(unsigned int));
                    cudaMemcpy(pBodyPartPairsGpuPtr, &bodyPartPairs[0], bodyPartPairs.size() * sizeof(unsigned int),
                               cudaMemcpyHostToDevice);
                    cudaMalloc((void **)&pMapIdxGpuPtr, mapIdx.size() * sizeof(unsigned int));
                    cudaMemcpy(pMapIdxGpuPtr, &mapIdx[0], mapIdx.size() * sizeof(unsigned int),
                               cudaMemcpyHostToDevice);
                    // Sanity check
                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
                }
                // Initialize auxiliary pointers (1-time task)
                if (mFinalOutputCpu.empty()) // if (pFinalOutputGpuPtr == nullptr)
                {
                    // Data
                    const auto& bodyPartPairs = getPosePartPairs(mPoseModel);
                    const auto numberBodyPartPairs = bodyPartPairs.size() / 2;
                    // Allocate memory
                    mFinalOutputCpu.reset({(int)numberBodyPartPairs, maxPeaks, maxPeaks});
                    const auto totalComputations = mFinalOutputCpu.getVolume();
                    if (pFinalOutputGpuPtr == nullptr)
                        cudaMalloc((void **)&pFinalOutputGpuPtr, totalComputations * sizeof(float));
                    // Sanity check
                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
                }

                // Run body part connector
R
Raaj 已提交
272
                connectBodyPartsGpu(poseKeypoints, poseScores, heatMapsGpuPtr, peaksPtr, mPoseModel,
G
gineshidalgo99 已提交
273 274
                                    Point<int>{heatMapsBlob->shape(3), heatMapsBlob->shape(2)},
                                    maxPeaks, mInterMinAboveThreshold, mInterThreshold,
G
gineshidalgo99 已提交
275 276 277
                                    mMinSubsetCnt, mMinSubsetScore, mScaleNetToOutput, mMaximizePositives,
                                    mFinalOutputCpu, pFinalOutputGpuPtr, pBodyPartPairsGpuPtr, pMapIdxGpuPtr,
                                    peaksGpuPtr);
278 279 280
            #else
                UNUSED(bottom);
                UNUSED(poseKeypoints);
281
                UNUSED(poseScores);
282 283 284
                error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
                      " this functionality.", __LINE__, __FUNCTION__, __FILE__);
            #endif
G
gineshidalgo99 已提交
285 286 287 288 289 290 291 292
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
293 294 295
    void BodyPartConnectorCaffe<T>::Backward_cpu(const std::vector<caffe::Blob<T>*>& top,
                                                 const std::vector<bool>& propagate_down,
                                                 const std::vector<caffe::Blob<T>*>& bottom)
G
gineshidalgo99 已提交
296 297 298 299 300 301
    {
        try
        {
            UNUSED(top);
            UNUSED(propagate_down);
            UNUSED(bottom);
302 303 304
            #ifdef USE_CAFFE
                NOT_IMPLEMENTED;
            #endif
G
gineshidalgo99 已提交
305 306 307 308 309 310 311 312
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

    template <typename T>
313 314 315
    void BodyPartConnectorCaffe<T>::Backward_gpu(const std::vector<caffe::Blob<T>*>& top,
                                                 const std::vector<bool>& propagate_down,
                                                 const std::vector<caffe::Blob<T>*>& bottom)
G
gineshidalgo99 已提交
316 317 318 319 320 321
    {
        try
        {
            UNUSED(top);
            UNUSED(propagate_down);
            UNUSED(bottom);
322 323 324
            #ifdef USE_CAFFE
                NOT_IMPLEMENTED;
            #endif
G
gineshidalgo99 已提交
325 326 327 328 329 330 331
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
        }
    }

332
    COMPILE_TEMPLATE_FLOATING_TYPES_CLASS(BodyPartConnectorCaffe);
G
gineshidalgo99 已提交
333
}