From c047d13b9685592f685728af7eb2c4a50f3d7a39 Mon Sep 17 00:00:00 2001 From: Gines Date: Mon, 10 Jul 2017 16:59:22 -0400 Subject: [PATCH] Fixed CUDA 7 compatibility --- src/openpose/core/maximumBase.cu | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/openpose/core/maximumBase.cu b/src/openpose/core/maximumBase.cu index ba2d5491..939da1f0 100644 --- a/src/openpose/core/maximumBase.cu +++ b/src/openpose/core/maximumBase.cu @@ -19,7 +19,10 @@ namespace op // __global__ void fillTargetPtrChannel(T* targetPtrOffsetted, const T* sourcePtrOffsetted, const int width, const int imageOffset) // { // const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); - // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // // Ideal option (not working for CUDA < 8) + // // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // // Workaround to make it work for CUDA 7.5 + // const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset); // const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); // targetPtrOffsetted[0] = sourceIndex % width; // targetPtrOffsetted[1] = sourceIndex / width; @@ -40,6 +43,10 @@ namespace op // const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; // auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // // Ideal option (not working for CUDA < 8) + // // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // // Workaround to make it work for CUDA 7.5 + // const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset); // const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); // targetPtrOffsetted[0] = sourceIndex % width; // targetPtrOffsetted[1] = sourceIndex / width; @@ -81,7 +88,10 @@ namespace op const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; // Option a - 6.3 fps const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); - const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // Ideal option (not working for CUDA < 8) + // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); + // Workaround to make it work for CUDA 7.5 + const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset); const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); fillTargetPtrPart<<<1, 1>>>(targetPtrOffsetted, sourcePtrOffsetted, sourceIndex, sourceIndex % width, sourceIndex / width); // // Option b - <1 fps -- GitLab