提交 13a6d0b9 编写于 作者: A Alexey Spizhevoy

fixed parsing GPU archs in BIN(PTX) format

上级 d70d2edc
...@@ -712,24 +712,32 @@ if(WITH_CUDA) ...@@ -712,24 +712,32 @@ if(WITH_CUDA)
set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
# These variables are used in config templates # These variables are used in config templates
string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}")
string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}")
# Ckeck if user specified 1.0 compute capability # Ckeck if user specified 1.0 compute capability
string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}")
set(CUDA_ARCH_BIN_OR_PTX_10 0)
if(NOT ${HAS_ARCH_10} STREQUAL "") if(NOT ${HAS_ARCH_10} STREQUAL "")
set(OPENCV_ARCH_GPU_OR_PTX_10 1) set(CUDA_ARCH_BIN_OR_PTX_10 1)
endif() endif()
set(NVCC_FLAGS_EXTRA "") # Flags to be set
set(NVCC_FLAGS_EXTRA "")
# These variables are passed into the template
set(OPENCV_CUDA_ARCH_BIN "")
set(OPENCV_CUDA_ARCH_PTX "")
# Tell nvcc to add binaries for the specified GPUs # Tell nvcc to add binaries for the specified GPUs
string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}") string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}")
foreach(ARCH IN LISTS ARCH_LIST) foreach(ARCH IN LISTS ARCH_LIST)
if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)")
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1})
set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}")
else() else()
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH}) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH})
set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}")
endif() endif()
endforeach() endforeach()
...@@ -737,8 +745,10 @@ if(WITH_CUDA) ...@@ -737,8 +745,10 @@ if(WITH_CUDA)
string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}")
foreach(ARCH IN LISTS ARCH_LIST) foreach(ARCH IN LISTS ARCH_LIST)
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH})
set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}")
endforeach() endforeach()
# Wil; be processed in other scripts
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
......
...@@ -167,13 +167,13 @@ ...@@ -167,13 +167,13 @@
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* Compile for 'real' NVIDIA GPU architectures */ /* Compile for 'real' NVIDIA GPU architectures */
#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}" #define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}"
/* Compile for 'virtual' NVIDIA PTX architectures */ /* Compile for 'virtual' NVIDIA PTX architectures */
#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}" #define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}"
/* Create PTX or CUBIN for 1.0 compute capability */ /* Create PTX or BIN for 1.0 compute capability */
#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10 #cmakedefine CUDA_ARCH_BIN_OR_PTX_10
/* VideoInput library */ /* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT #cmakedefine HAVE_VIDEOINPUT
......
...@@ -164,33 +164,33 @@ namespace ...@@ -164,33 +164,33 @@ namespace
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
{ {
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to<int>()); return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to<int>());
} }
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
{ {
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, return ::compare(CUDA_ARCH_PTX, major * 10 + minor,
std::less_equal<int>()); std::less_equal<int>());
} }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
{ {
return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, return ::compare(CUDA_ARCH_PTX, major * 10 + minor,
std::greater_equal<int>()); std::greater_equal<int>());
} }
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
{ {
return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to<int>()); return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to<int>());
} }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
{ {
return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, return ::compare(CUDA_ARCH_BIN, major * 10 + minor,
std::greater_equal<int>()); std::greater_equal<int>());
} }
......
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
#error "Insufficient NPP version, please update it." #error "Insufficient NPP version, please update it."
#endif #endif
#if defined(OPENCV_ARCH_GPU_OR_PTX_10) #if defined(CUDA_ARCH_BIN_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif #endif
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册