提交 c4369cba 编写于 作者: G Gines Hidalgo

Example 09_keypoints_from_heatmaps working for CUDA again

上级 3c9441ae
...@@ -370,13 +370,14 @@ OpenPose Library - Release Notes ...@@ -370,13 +370,14 @@ OpenPose Library - Release Notes
1. Highly improved 3D triangulation for >3 cameras by fixing some small bugs. 1. Highly improved 3D triangulation for >3 cameras by fixing some small bugs.
2. Added community-based support for Nvidia NVCaffe. 2. Added community-based support for Nvidia NVCaffe.
3. Increased accuracy very lightly for CUDA version (about 0.01%) by adapting the threshold in `process()` in `bodyPartConnectorBase.cu` to `defaultNmsThreshold`. This also removes any posibility of future bugs in that function for using a default NMS threshold higher than 0.15 (which was the hard-coded value used previously). 3. Increased accuracy very lightly for CUDA version (about 0.01%) by adapting the threshold in `process()` in `bodyPartConnectorBase.cu` to `defaultNmsThreshold`. This also removes any posibility of future bugs in that function for using a default NMS threshold higher than 0.15 (which was the hard-coded value used previously).
4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholds` is re-run with `maximizePositives = true`. 4. Increased mAP but reduced mAR (both about 0.01%) as well as reduction of false positives. Step 1: removed legs where only knee/ankle/feet are found. Step 2: If no people is found in an image, `removePeopleBelowThresholdsAndFillFaces` is re-run with `maximizePositives = true`.
5. Number of maximum people is not limited by the maximum number of max peaks anymore. However, the number of body part candidates for a specific keypoint (e.g., nose) is still limited to the number of max peaks. 5. Number of maximum people is not limited by the maximum number of max peaks anymore. However, the number of body part candidates for a specific keypoint (e.g., nose) is still limited to the number of max peaks.
6. Added more checks during destructors of CUDA-related functions and safer CUDA frees.
2. Functions or parameters renamed: 2. Functions or parameters renamed:
1. `--3d_min_views` default value (-1) no longer means that all camera views are required. Instead, it will be equal to max(2, min(4, #cameras-1)). This should provide a good trade-off between recall and precission. 1. `--3d_min_views` default value (-1) no longer means that all camera views are required. Instead, it will be equal to max(2, min(4, #cameras-1)). This should provide a good trade-off between recall and precission.
3. Main bugs fixed: 3. Main bugs fixed:
1. Windows: Added back support for OpenGL and Spinnaker, as well as DLLs for debug compilation. 1. Windows: Added back support for OpenGL and Spinnaker, as well as DLLs for debug compilation.
2. `06_face_from_image.cpp` and `07_hand_from_image.cpp` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI. 2. `06_face_from_image.cpp`, `07_hand_from_image.cpp`, and `09_keypoints_from_heatmaps` working again, they stopped working in version 1.5.0 with the GPU image resize for the GUI.
4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin: 4. Changes/additions that affect the compatibility with the OpenPose Unity Plugin:
......
...@@ -34,9 +34,23 @@ namespace op ...@@ -34,9 +34,23 @@ namespace op
if (mGpuResize) if (mGpuResize)
{ {
// Free temporary memory // Free temporary memory
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pInputImageCuda != nullptr)
{
cudaFree(pInputImageCuda); cudaFree(pInputImageCuda);
pInputImageCuda = nullptr;
}
if (pOutputImageCuda != nullptr)
{
cudaFree(pOutputImageCuda); cudaFree(pOutputImageCuda);
pOutputImageCuda = nullptr;
}
if (pInputImageReorderedCuda != nullptr)
{
cudaFree(pInputImageReorderedCuda); cudaFree(pInputImageReorderedCuda);
pInputImageReorderedCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
} }
#endif #endif
} }
......
...@@ -38,8 +38,18 @@ namespace op ...@@ -38,8 +38,18 @@ namespace op
if (mGpuResize) if (mGpuResize)
{ {
// Free temporary memory // Free temporary memory
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pInputImageCuda != nullptr)
{
cudaFree(pInputImageCuda); cudaFree(pInputImageCuda);
pInputImageCuda = nullptr;
}
if (*spOutputImageCuda != nullptr)
{
cudaFree(*spOutputImageCuda); cudaFree(*spOutputImageCuda);
*spOutputImageCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
} }
#endif #endif
} }
......
...@@ -46,7 +46,10 @@ namespace op ...@@ -46,7 +46,10 @@ namespace op
{ {
#ifdef USE_CUDA #ifdef USE_CUDA
if (mIsLastRenderer && spGpuMemory != nullptr) if (mIsLastRenderer && spGpuMemory != nullptr)
{
cudaFree(*spGpuMemory); cudaFree(*spGpuMemory);
*spGpuMemory = nullptr;
}
#endif #endif
} }
catch (const std::exception& e) catch (const std::exception& e)
......
...@@ -37,8 +37,18 @@ namespace op ...@@ -37,8 +37,18 @@ namespace op
if (mGpuResize) if (mGpuResize)
{ {
// Free temporary memory // Free temporary memory
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (*spOutputImageFloatCuda != nullptr)
{
cudaFree(*spOutputImageFloatCuda); cudaFree(*spOutputImageFloatCuda);
*spOutputImageFloatCuda = nullptr;
}
if (pOutputImageUCharCuda != nullptr)
{
cudaFree(pOutputImageUCharCuda); cudaFree(pOutputImageUCharCuda);
pOutputImageUCharCuda = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
} }
#endif #endif
} }
......
...@@ -24,14 +24,28 @@ namespace op ...@@ -24,14 +24,28 @@ namespace op
{ {
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
#ifdef USE_CUDA #ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuFace != nullptr) if (pGpuFace != nullptr)
{
cudaFree(pGpuFace); cudaFree(pGpuFace);
pGpuFace = nullptr;
}
if (pMaxPtr != nullptr) if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr); cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr) if (pMinPtr != nullptr)
{
cudaFree(pMinPtr); cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr) if (pScalePtr != nullptr)
{
cudaFree(pScalePtr); cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif #endif
} }
catch (const std::exception& e) catch (const std::exception& e)
......
...@@ -24,14 +24,28 @@ namespace op ...@@ -24,14 +24,28 @@ namespace op
{ {
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
#ifdef USE_CUDA #ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuHand != nullptr) if (pGpuHand != nullptr)
{
cudaFree(pGpuHand); cudaFree(pGpuHand);
pGpuHand = nullptr;
}
if (pMaxPtr != nullptr) if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr); cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr) if (pMinPtr != nullptr)
{
cudaFree(pMinPtr); cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr) if (pScalePtr != nullptr)
{
cudaFree(pScalePtr); cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif #endif
} }
catch (const std::exception& e) catch (const std::exception& e)
......
...@@ -61,14 +61,14 @@ namespace op ...@@ -61,14 +61,14 @@ namespace op
template <typename T> template <typename T>
void getKeypointCounter( void getKeypointCounter(
int& personCounter, const std::vector<std::pair<std::vector<int>, T>>& peopleVector, int& personCounter, const std::vector<std::pair<std::vector<int>, T>>& peopleVector,
const unsigned int index, const int indexFirst, const int indexLast, const int minimum) const unsigned int part, const int partFirst, const int partLast, const int minimum)
{ {
try try
{ {
// Count keypoints // Count keypoints
auto keypointCounter = 0; auto keypointCounter = 0;
for (auto i = indexFirst ; i < indexLast ; i++) for (auto i = partFirst ; i < partLast ; i++)
keypointCounter += (peopleVector[index].first.at(i) > 0); keypointCounter += (peopleVector[part].first.at(i) > 0);
// If enough keypoints --> subtract them and keep them at least as big as minimum // If enough keypoints --> subtract them and keep them at least as big as minimum
if (keypointCounter > minimum) if (keypointCounter > minimum)
personCounter += minimum-keypointCounter; // personCounter = non-considered keypoints + minimum personCounter += minimum-keypointCounter; // personCounter = non-considered keypoints + minimum
...@@ -79,6 +79,53 @@ namespace op ...@@ -79,6 +79,53 @@ namespace op
} }
} }
template <typename T>
void getRoiDiameterAndBounds(
Rectangle<int>& roi, int& diameter, int& partFirstNon0, int& partLastNon0,
const std::vector<int>& personVector, const T* const peaksPtr,
const int partInit, const int partEnd)
{
try
{
// Find ROI, partFirstNon0, and partLastNon0
roi = Rectangle<int>{0,0,0,0};
partFirstNon0 = -1;
partLastNon0 = -1;
for (auto part = partInit ; part < partEnd ; part++)
{
const auto x = peaksPtr[personVector[part]-2];
const auto y = peaksPtr[personVector[part]-1];
const auto score = peaksPtr[personVector[part]];
if (score > 0)
{
// ROI
if (roi.x > x)
roi.x = x;
if (roi.y > y)
roi.y = y;
if (roi.width < x)
roi.width = x;
if (roi.height > y)
roi.height = y;
// First keypoint?
if (partFirstNon0 < 0)
partFirstNon0 = part;
// Last keypoint?
partLastNon0 = part;
}
}
// From [p1, p2] to [p1, width, height]
roi.width -= roi.x;
roi.height -= roi.y;
// diameter
diameter = fastMax(roi.width, roi.height);
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T> template <typename T>
std::vector<std::pair<std::vector<int>, T>> createPeopleVector( std::vector<std::pair<std::vector<int>, T>> createPeopleVector(
const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize, const T* const heatMapPtr, const T* const peaksPtr, const PoseModel poseModel, const Point<int>& heatMapSize,
...@@ -643,32 +690,6 @@ namespace op ...@@ -643,32 +690,6 @@ namespace op
} }
} }
template <typename T>
void getRoiDiameterAndBounds(
Rectangle<int>& roi, int& diameter, int& indexFirstNon0, int& indexLastNon0,
const std::vector<int>& personVector, const T* const peaksPtr,
const int indexInit, const int indexEnd)
{
try
{
roi = Rectangle<int>{0,0,0,0};
for (auto index = 0u ; index < personVector.size()-1 ; index++)
{
const auto x = peaksPtr[personVector[index]-2];
const auto y = peaksPtr[personVector[index]-1];
const auto score = peaksPtr[personVector[index]];
if (roi.x > x)
roi.x = x;
if (roi.y > y)
roi.y = y;
}
}
catch (const std::exception& e)
{
error(e.what(), __LINE__, __FUNCTION__, __FILE__);
}
}
template <typename T> template <typename T>
void removePeopleBelowThresholdsAndFillFaces( void removePeopleBelowThresholdsAndFillFaces(
std::vector<int>& validSubsetIndexes, int& numberPeople, std::vector<int>& validSubsetIndexes, int& numberPeople,
...@@ -693,28 +714,28 @@ namespace op ...@@ -693,28 +714,28 @@ namespace op
std::vector<int> faceInvalidSubsetIndexes; std::vector<int> faceInvalidSubsetIndexes;
faceInvalidSubsetIndexes.reserve(peopleVector.size()); faceInvalidSubsetIndexes.reserve(peopleVector.size());
// For each person candidate // For each person candidate
for (auto index = 0u ; index < peopleVector.size() ; index++) for (auto person = 0u ; person < peopleVector.size() ; person++)
{ {
auto personCounter = peopleVector[index].first.back(); auto personCounter = peopleVector[person].first.back();
// Analog for hand/face keypoints // Analog for hand/face keypoints
if (numberBodyParts >= 135) if (numberBodyParts >= 135)
{ {
// No consider face keypoints for personCounter // No consider face keypoints for personCounter
const auto currentCounter = personCounter; const auto currentCounter = personCounter;
getKeypointCounter(personCounter, peopleVector, index, 65, 135, 1); getKeypointCounter(personCounter, peopleVector, person, 65, 135, 1);
const auto newCounter = personCounter; const auto newCounter = personCounter;
if (personCounter == 0) if (personCounter == 1)
{ {
faceInvalidSubsetIndexes.emplace_back(index); faceInvalidSubsetIndexes.emplace_back(person);
continue; continue;
} }
// If body is still valid and facial points were removed, then add to valid faces // If body is still valid and facial points were removed, then add to valid faces
else if (currentCounter != newCounter) else if (currentCounter != newCounter)
faceValidSubsetIndexes.emplace_back(index); faceValidSubsetIndexes.emplace_back(person);
// No consider right hand keypoints for personCounter // No consider right hand keypoints for personCounter
getKeypointCounter(personCounter, peopleVector, index, 45, 65, 1); getKeypointCounter(personCounter, peopleVector, person, 45, 65, 1);
// No consider left hand keypoints for personCounter // No consider left hand keypoints for personCounter
getKeypointCounter(personCounter, peopleVector, index, 25, 45, 1); getKeypointCounter(personCounter, peopleVector, person, 25, 45, 1);
} }
// Foot keypoints do not affect personCounter (too many false positives, // Foot keypoints do not affect personCounter (too many false positives,
// same foot usually appears as both left and right keypoints) // same foot usually appears as both left and right keypoints)
...@@ -724,7 +745,7 @@ namespace op ...@@ -724,7 +745,7 @@ namespace op
if (!maximizePositives && (numberBodyParts == 25 || numberBodyParts > 70)) if (!maximizePositives && (numberBodyParts == 25 || numberBodyParts > 70))
{ {
const auto currentCounter = personCounter; const auto currentCounter = personCounter;
getKeypointCounter(personCounter, peopleVector, index, 19, 25, 0); getKeypointCounter(personCounter, peopleVector, person, 19, 25, 0);
const auto newCounter = personCounter; const auto newCounter = personCounter;
// Problem: Same leg/foot keypoints are considered for both left and right keypoints. // Problem: Same leg/foot keypoints are considered for both left and right keypoints.
// Solution: Remove legs that are duplicated and that do not have upper torso // Solution: Remove legs that are duplicated and that do not have upper torso
...@@ -733,11 +754,11 @@ namespace op ...@@ -733,11 +754,11 @@ namespace op
continue; continue;
} }
// Add only valid people // Add only valid people
const auto personScore = peopleVector[index].second; const auto personScore = peopleVector[person].second;
if (personCounter >= minSubsetCnt && (personScore/personCounter) >= minSubsetScore) if (personCounter >= minSubsetCnt && (personScore/personCounter) >= minSubsetScore)
{ {
numberPeople++; numberPeople++;
validSubsetIndexes.emplace_back(index); validSubsetIndexes.emplace_back(person);
// // This is not required, it is OK if there are more people. No more GPU memory used. // // This is not required, it is OK if there are more people. No more GPU memory used.
// if (numberPeople == maxPeaks) // if (numberPeople == maxPeaks)
// break; // break;
...@@ -747,25 +768,6 @@ namespace op ...@@ -747,25 +768,6 @@ namespace op
error("Bad personCounter (" + std::to_string(personCounter) + "). Bug in this" error("Bad personCounter (" + std::to_string(personCounter) + "). Bug in this"
" function if this happens.", __LINE__, __FUNCTION__, __FILE__); " function if this happens.", __LINE__, __FUNCTION__, __FILE__);
} }
// // Random standalone facial keypoints --> Merge into a more complete face
// if (numberPeople > 0 && faceInvalidSubsetIndexes.size() > 0)
// {
// for (auto faceId = 0u ; faceId < faceInvalidSubsetIndexes.size() ; faceId++)
// {
// // Get ROI
// Rectangle<int> roi;
// int diameter;
// int indexFirstNon0;
// int indexLastNon0;
// const auto index = faceValidSubsetIndexes[faceId];
// getRoiDiameterAndBounds(
// roi, diameter, indexFirstNon0, indexLastNon0, peopleVector[index].first, peaksPtr, 65, 135);
// // const auto personCounter = peopleVector[index].first.back();
// // const auto x = peaksPtr[peopleVector[index].first[part]-2];
// // const auto y = peaksPtr[peopleVector[index].first[part]-1];
// // const auto score = peaksPtr[peopleVector[index].first[part]];
// }
// }
// If no people found --> Repeat with maximizePositives = true // If no people found --> Repeat with maximizePositives = true
// Result: Increased COCO mAP because we catch more foot-only images // Result: Increased COCO mAP because we catch more foot-only images
if (numberPeople == 0 && !maximizePositives) if (numberPeople == 0 && !maximizePositives)
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#endif #endif
#ifdef USE_CUDA #ifdef USE_CUDA
#include <openpose/gpu/cuda.hpp> #include <openpose/gpu/cuda.hpp>
#include <openpose/gpu/cuda.hu>
#endif #endif
#ifdef USE_OPENCL #ifdef USE_OPENCL
#include <openpose/gpu/opencl.hcl> #include <openpose/gpu/opencl.hcl>
...@@ -41,14 +42,28 @@ namespace op ...@@ -41,14 +42,28 @@ namespace op
try try
{ {
#if defined USE_CAFFE && defined USE_CUDA #if defined USE_CAFFE && defined USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pBodyPartPairsGpuPtr != nullptr)
{
cudaFree(pBodyPartPairsGpuPtr); cudaFree(pBodyPartPairsGpuPtr);
pBodyPartPairsGpuPtr = nullptr;
}
if (pMapIdxGpuPtr != nullptr)
{
cudaFree(pMapIdxGpuPtr); cudaFree(pMapIdxGpuPtr);
pMapIdxGpuPtr = nullptr;
}
if (pFinalOutputGpuPtr != nullptr)
{
cudaFree(pFinalOutputGpuPtr); cudaFree(pFinalOutputGpuPtr);
pFinalOutputGpuPtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif #endif
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
error(e.what(), __LINE__, __FUNCTION__, __FILE__); errorDestructor(e.what(), __LINE__, __FUNCTION__, __FILE__);
} }
} }
......
...@@ -45,9 +45,9 @@ namespace op ...@@ -45,9 +45,9 @@ namespace op
#if defined USE_CAFFE && defined USE_OPENCL #if defined USE_CAFFE && defined USE_OPENCL
try try
{ {
if(mKernelGpuPtr != nullptr) if (mKernelGpuPtr != nullptr)
clReleaseMemObject((cl_mem)mKernelGpuPtr); clReleaseMemObject((cl_mem)mKernelGpuPtr);
if(mKernelCpuPtr != nullptr) if (mKernelCpuPtr != nullptr)
delete mKernelCpuPtr; delete mKernelCpuPtr;
} }
catch (const std::exception& e) catch (const std::exception& e)
......
...@@ -4,8 +4,18 @@ ...@@ -4,8 +4,18 @@
namespace op namespace op
{ {
const auto THREADS_PER_BLOCK = 256u;
const auto THREADS_PER_BLOCK_1D = 16u; const auto THREADS_PER_BLOCK_1D = 16u;
template <typename T>
__global__ void fillKernel(
T* targetPtr, const T* const sourcePtr, const int N)
{
const auto x = (blockIdx.x * blockDim.x) + threadIdx.x;
if (x < N)
targetPtr[x] = sourcePtr[x];
}
// template <typename T> // template <typename T>
// __global__ void resizeKernelOld( // __global__ void resizeKernelOld(
// T* targetPtr, const T* const sourcePtr, const int widthSource, const int heightSource, const int widthTarget, // T* targetPtr, const T* const sourcePtr, const int widthSource, const int heightSource, const int widthTarget,
...@@ -329,6 +339,16 @@ namespace op ...@@ -329,6 +339,16 @@ namespace op
// Optimized function for 8x resize // Optimized function for 8x resize
// OP_CUDA_PROFILE_INIT(REPS); // OP_CUDA_PROFILE_INIT(REPS);
if (widthTarget / widthSource == 1 && heightTarget / heightSource == 1)
{
const auto N = widthTarget * heightTarget * num * channels;
const dim3 threadsPerBlock{THREADS_PER_BLOCK};
const dim3 numBlocks{getNumberCudaBlocks(N, threadsPerBlock.x)};
fillKernel<<<numBlocks, threadsPerBlock>>>(
targetPtr, sourcePtrs.at(0), N);
}
else
{
if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8) if (widthTarget / widthSource != 8 || heightTarget / heightSource != 8)
error("Kernel only implemented for 8x resize. Notify us if this error appears.", error("Kernel only implemented for 8x resize. Notify us if this error appears.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
...@@ -341,6 +361,7 @@ namespace op ...@@ -341,6 +361,7 @@ namespace op
resize8TimesKernel<<<numBlocks, threadsPerBlock>>>( resize8TimesKernel<<<numBlocks, threadsPerBlock>>>(
targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget, targetPtr, sourcePtrs.at(0), widthSource, heightSource, widthTarget, heightTarget,
rescaleFactor); rescaleFactor);
}
// OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS); // OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS);
// // Profiling code // // Profiling code
...@@ -482,9 +503,13 @@ namespace op ...@@ -482,9 +503,13 @@ namespace op
widthTarget, heightTarget, sourcePtrs[0], sourcePtrs[1], sourcePtrs[2], sourcePtrs[3], widthTarget, heightTarget, sourcePtrs[0], sourcePtrs[1], sourcePtrs[2], sourcePtrs[3],
sourcePtrs[4], sourcePtrs[5], sourcePtrs[6], sourcePtrs[7]); sourcePtrs[4], sourcePtrs[5], sourcePtrs[6], sourcePtrs[7]);
// Free memory // Free memory
if (widthSources != nullptr)
cudaFree(widthSources); cudaFree(widthSources);
if (heightSources != nullptr)
cudaFree(heightSources); cudaFree(heightSources);
if (scaleWidths != nullptr)
cudaFree(scaleWidths); cudaFree(scaleWidths);
if (scaleHeights != nullptr)
cudaFree(scaleHeights); cudaFree(scaleHeights);
// OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS); // OP_CUDA_PROFILE_END(timeNormalize3, 1e3, REPS);
......
...@@ -34,16 +34,32 @@ namespace op ...@@ -34,16 +34,32 @@ namespace op
try try
{ {
// Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed. // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
#ifdef USE_CUDA #ifdef USE_CUDA
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
if (pGpuPose != nullptr) if (pGpuPose != nullptr)
{
cudaFree(pGpuPose); cudaFree(pGpuPose);
pGpuPose = nullptr;
}
if (pMaxPtr != nullptr) if (pMaxPtr != nullptr)
{
cudaFree(pMaxPtr); cudaFree(pMaxPtr);
pMaxPtr = nullptr;
}
if (pMinPtr != nullptr) if (pMinPtr != nullptr)
{
cudaFree(pMinPtr); cudaFree(pMinPtr);
pMinPtr = nullptr;
}
if (pScalePtr != nullptr) if (pScalePtr != nullptr)
{
cudaFree(pScalePtr); cudaFree(pScalePtr);
pScalePtr = nullptr;
}
cudaCheck(__LINE__, __FUNCTION__, __FILE__);
#endif #endif
log("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
} }
catch (const std::exception& e) catch (const std::exception& e)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册