diff --git a/CMakeLists.txt b/CMakeLists.txt index 586f8eb5ec0acaddcb3449a28b7510c97345ee77..df8e9a216561175b4ca9888760d591366f728140 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -712,24 +712,32 @@ if(WITH_CUDA) set(CUDA_ARCH_PTX "1.1 1.3" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") # These variables are used in config templates - string(REGEX REPLACE "\\." "" ARCH_GPU_NO_POINTS "${CUDA_ARCH_BIN}") + string(REGEX REPLACE "\\." "" ARCH_BIN_NO_POINTS "${CUDA_ARCH_BIN}") string(REGEX REPLACE "\\." "" ARCH_PTX_NO_POINTS "${CUDA_ARCH_PTX}") # Ckeck if user specified 1.0 compute capability string(REGEX MATCH "1.0" HAS_ARCH_10 "${CUDA_ARCH_BIN} ${CUDA_ARCH_PTX}") + set(CUDA_ARCH_BIN_OR_PTX_10 0) if(NOT ${HAS_ARCH_10} STREQUAL "") - set(OPENCV_ARCH_GPU_OR_PTX_10 1) + set(CUDA_ARCH_BIN_OR_PTX_10 1) endif() - set(NVCC_FLAGS_EXTRA "") + # Flags to be set + set(NVCC_FLAGS_EXTRA "") + + # These variables are passed into the template + set(OPENCV_CUDA_ARCH_BIN "") + set(OPENCV_CUDA_ARCH_PTX "") # Tell nvcc to add binaries for the specified GPUs - string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_GPU_NO_POINTS}") + string(REGEX MATCHALL "[0-9()]+" ARCH_LIST "${ARCH_BIN_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) if (ARCH MATCHES "([0-9]+)\\(([0-9]+)\\)") set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${CMAKE_MATCH_1}") else() set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=sm_${ARCH}) + set(OPENCV_CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN} ${ARCH}") endif() endforeach() @@ -737,8 +745,10 @@ if(WITH_CUDA) string(REGEX MATCHALL "[0-9]+" ARCH_LIST "${ARCH_PTX_NO_POINTS}") foreach(ARCH IN LISTS ARCH_LIST) set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH},code=compute_${ARCH}) + set(OPENCV_CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX} ${ARCH}") endforeach() + # Wil; be processed in other scripts set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA}) set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}") diff --git a/cvconfig.h.cmake b/cvconfig.h.cmake index 5b35d247df29a3d7a45a6fd3235baa3109746f4d..e04cc47da0c71db19629df0970dd7e2d56ada26a 100644 --- a/cvconfig.h.cmake +++ b/cvconfig.h.cmake @@ -167,13 +167,13 @@ #cmakedefine HAVE_CUDA /* Compile for 'real' NVIDIA GPU architectures */ -#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}" +#define CUDA_ARCH_BIN "${OPENCV_CUDA_ARCH_BIN}" /* Compile for 'virtual' NVIDIA PTX architectures */ -#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}" +#define CUDA_ARCH_PTX "${OPENCV_CUDA_ARCH_PTX}" -/* Create PTX or CUBIN for 1.0 compute capability */ -#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10 +/* Create PTX or BIN for 1.0 compute capability */ +#cmakedefine CUDA_ARCH_BIN_OR_PTX_10 /* VideoInput library */ #cmakedefine HAVE_VIDEOINPUT diff --git a/modules/gpu/src/initialization.cpp b/modules/gpu/src/initialization.cpp index 0104bd02a2e45c52781d76cb182dfde5a6541cc7..17bd150f0fd28e8bd3ee78a2837a28650cf2473d 100644 --- a/modules/gpu/src/initialization.cpp +++ b/modules/gpu/src/initialization.cpp @@ -164,33 +164,33 @@ namespace CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, std::equal_to()); + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::equal_to()); } CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::less_equal()); } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_PTX, major * 10 + minor, + return ::compare(CUDA_ARCH_PTX, major * 10 + minor, std::greater_equal()); } CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, std::equal_to()); + return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::equal_to()); } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { - return ::compare(OPENCV_ARCH_GPU, major * 10 + minor, + return ::compare(CUDA_ARCH_BIN, major * 10 + minor, std::greater_equal()); } diff --git a/modules/gpu/src/precomp.hpp b/modules/gpu/src/precomp.hpp index 624e67fce1864f155b4937914358cb1acfc1a03b..d5ad3cc7271f716c5154c3108eefe4059b9eb66a 100644 --- a/modules/gpu/src/precomp.hpp +++ b/modules/gpu/src/precomp.hpp @@ -85,7 +85,7 @@ #error "Insufficient NPP version, please update it." #endif -#if defined(OPENCV_ARCH_GPU_OR_PTX_10) +#if defined(CUDA_ARCH_BIN_OR_PTX_10) #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #endif