提交 c047d13b 编写于 作者: G Gines

Fixed CUDA 7 compatibility

上级 635dc580
...@@ -19,7 +19,10 @@ namespace op ...@@ -19,7 +19,10 @@ namespace op
// __global__ void fillTargetPtrChannel(T* targetPtrOffsetted, const T* sourcePtrOffsetted, const int width, const int imageOffset) // __global__ void fillTargetPtrChannel(T* targetPtrOffsetted, const T* sourcePtrOffsetted, const int width, const int imageOffset)
// { // {
// const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); // const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); // // Ideal option (not working for CUDA < 8)
// // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Workaround to make it work for CUDA 7.5
// const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
// const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); // const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
// targetPtrOffsetted[0] = sourceIndex % width; // targetPtrOffsetted[0] = sourceIndex % width;
// targetPtrOffsetted[1] = sourceIndex / width; // targetPtrOffsetted[1] = sourceIndex / width;
...@@ -40,6 +43,10 @@ namespace op ...@@ -40,6 +43,10 @@ namespace op
// const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; // const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset;
// auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); // auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Ideal option (not working for CUDA < 8)
// // const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// // Workaround to make it work for CUDA 7.5
// const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
// const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); // const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
// targetPtrOffsetted[0] = sourceIndex % width; // targetPtrOffsetted[0] = sourceIndex % width;
// targetPtrOffsetted[1] = sourceIndex / width; // targetPtrOffsetted[1] = sourceIndex / width;
...@@ -81,7 +88,10 @@ namespace op ...@@ -81,7 +88,10 @@ namespace op
const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset; const auto* const sourcePtrOffsetted = sourcePtr + (offsetChannel + part) * imageOffset;
// Option a - 6.3 fps // Option a - 6.3 fps
const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted); const auto sourceThrustPtr = thrust::device_pointer_cast(sourcePtrOffsetted);
const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset); // Ideal option (not working for CUDA < 8)
// const auto sourceIndexIterator = thrust::max_element(thrust::device, sourceThrustPtr, sourceThrustPtr + imageOffset);
// Workaround to make it work for CUDA 7.5
const auto sourceIndexIterator = thrust::max_element(sourceThrustPtr, sourceThrustPtr + imageOffset);
const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr); const auto sourceIndex = (int)(sourceIndexIterator - sourceThrustPtr);
fillTargetPtrPart<<<1, 1>>>(targetPtrOffsetted, sourcePtrOffsetted, sourceIndex, sourceIndex % width, sourceIndex / width); fillTargetPtrPart<<<1, 1>>>(targetPtrOffsetted, sourcePtrOffsetted, sourceIndex, sourceIndex % width, sourceIndex / width);
// // Option b - <1 fps // // Option b - <1 fps
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册