diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 6f4671c13a9e3dccb9be0a06f4bc2453af94bd55..7a94bda0f5f73e48081f68d7b2730e3df1e46232 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -16,7 +16,7 @@ else() set(paddle_known_gpu_archs8 "30 35 50 52 60 61") set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70") set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75") - set(paddle_known_gpu_archs11 "35 50 52 60 61 70 75 80") + set(paddle_known_gpu_archs11 "52 60 61 70 75 80") endif() ###################################################################################### diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 0f6b1c182d5590354c8a970eea339a3e23846f39..ac6cf624e82c0a346fea42fa29fe9bab6ace8d47 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -18,7 +18,7 @@ SET(WARPCTC_PREFIX_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) SET(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) set(WARPCTC_REPOSITORY https://github.com/baidu-research/warp-ctc.git) -set(WARPCTC_TAG bc29dcfff07ced1c7a19a4ecee48e5ad583cef8e) +set(WARPCTC_TAG fc7f226b93758216a03b1be9d24593a12819b984) SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 9d07a0979d9392c9b2ab78562f8e0ceb8fc5d722..415e07c75425345f5f1ad29a8544e02a5bfb12e4 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -28,7 +28,15 @@ function(CheckCompilerCXX11Flag) endfunction() CheckCompilerCXX11Flag() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +if (WITH_GPU) + if (${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + endif() +else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +endif() # safe_set_flag # # Set a compile flag only if compiler is support diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index c9442e8f843ac152cac02908799a8d24f5951e58..9edfcb967abc26a25a94d368298c1c475295019f 100644 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -243,9 +243,10 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) ENDIF() if(WITH_GPU) - include(external/cub) # download cub - list(APPEND third_party_deps extern_cub) - + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + include(external/cub) # download cub + list(APPEND third_party_deps extern_cub) + endif() set(CUDAERROR_URL "http://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE) file_download_and_uncompress(${CUDAERROR_URL} "cudaerror") # download file cudaErrorMessage endif(WITH_GPU) diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt index 6e8ff52ed4a8846f5f6060e10cfd9bec22308e9e..f0a04d850dff01e0776e96bbe518cde2ce8bb88b 100644 --- a/paddle/fluid/operators/CMakeLists.txt +++ b/paddle/fluid/operators/CMakeLists.txt @@ -45,7 +45,9 @@ endif() SET(OP_HEADER_DEPS xxhash executor) if (WITH_GPU) - SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + SET(OP_HEADER_DEPS ${OP_HEADER_DEPS} cub) + endif() endif() SET(OP_PREFETCH_DEPS "") diff --git a/paddle/fluid/operators/detection/CMakeLists.txt b/paddle/fluid/operators/detection/CMakeLists.txt index 537063640e4ef6e49f7b991482f0f3122ecef02f..c2b7c27ab4adb5282ad7aa5f7a16c15f81ba5f5e 100644 --- a/paddle/fluid/operators/detection/CMakeLists.txt +++ b/paddle/fluid/operators/detection/CMakeLists.txt @@ -41,9 +41,13 @@ detection_library(sigmoid_focal_loss_op SRCS sigmoid_focal_loss_op.cc sigmoid_fo detection_library(retinanet_detection_output_op SRCS retinanet_detection_output_op.cc) if(WITH_GPU) - detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS memory cub) - detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc distribute_fpn_proposals_op.cu DEPS memory cub) - detection_library(collect_fpn_proposals_op SRCS collect_fpn_proposals_op.cc collect_fpn_proposals_op.cu DEPS memory cub) + set(TMPDEPS memory) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + set(TMPDEPS memory cub) + endif() + detection_library(generate_proposals_op SRCS generate_proposals_op.cc generate_proposals_op.cu DEPS ${TMPDEPS}) + detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc distribute_fpn_proposals_op.cu DEPS ${TMPDEPS}) + detection_library(collect_fpn_proposals_op SRCS collect_fpn_proposals_op.cc collect_fpn_proposals_op.cu DEPS ${TMPDEPS}) else() detection_library(generate_proposals_op SRCS generate_proposals_op.cc) detection_library(distribute_fpn_proposals_op SRCS distribute_fpn_proposals_op.cc) diff --git a/paddle/fluid/operators/math/CMakeLists.txt b/paddle/fluid/operators/math/CMakeLists.txt index 3a19c7edff3569d503480fd060a6432dc59d2108..10d335b828b516fe08871f314ba4667c06f04714 100644 --- a/paddle/fluid/operators/math/CMakeLists.txt +++ b/paddle/fluid/operators/math/CMakeLists.txt @@ -9,7 +9,11 @@ function(math_library TARGET) set(hip_srcs) set(math_common_deps device_context framework_proto enforce) if (WITH_GPU) - list(APPEND math_common_deps cub) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + list(APPEND math_common_deps cub) + else() + list(APPEND math_common_deps) + endif() endif() set(multiValueArgs DEPS) cmake_parse_arguments(math_library "${options}" "${oneValueArgs}" diff --git a/paddle/fluid/operators/reduce_ops/CMakeLists.txt b/paddle/fluid/operators/reduce_ops/CMakeLists.txt index 3da481a142aa2282aade661de7679cf4edf597a0..a68666b100cb52c722c4fefc849e94947130010f 100644 --- a/paddle/fluid/operators/reduce_ops/CMakeLists.txt +++ b/paddle/fluid/operators/reduce_ops/CMakeLists.txt @@ -1,6 +1,10 @@ include(operators) if(WITH_GPU) - register_operators(DEPS cub) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + register_operators(DEPS cub) + else() + register_operators() + endif() else() register_operators() endif() @@ -24,5 +28,9 @@ if(WITH_GPU) endif() if(WITH_GPU) - nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor cub) + if (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) + nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor cub) + else() + nv_test(check_reduce_rank_test SRCS check_reduce_rank_test.cu DEPS tensor) + endif() endif()