diff --git a/CMakeLists.txt b/CMakeLists.txt index a22d8dee50000e8d5fc4ba85367cc455fbea792a..1dd586b5fc6a4ab301e4f0344028baad1fcaee60 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,19 +20,6 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) include(system) project(paddle CXX C) - -include(init) - -# enable language CUDA -# TODO(Shibo Tao): remove find_package(CUDA) completely. -find_package(CUDA QUIET) -option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) -if(WITH_GPU AND NOT APPLE) - enable_language(CUDA) - message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: " - "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}") -endif() - message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " @@ -65,12 +52,14 @@ else(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations") endif(WIN32) +find_package(CUDA QUIET) find_package(Git REQUIRED) find_package(Threads REQUIRED) include(simd) ################################ Exposed Configurations ####################################### +option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_DSO "Compile PaddlePaddle with dynamic linked CUDA" ON) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 57362c4b181df3f63c7c7a7042010fd342a980f7..b0ce1a4ea2d851eb08f341d8dd145e918f08c630 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -76,7 +76,7 @@ if(WITH_GPU) FIND_PACKAGE(CUDA REQUIRED) - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 7) + if(${CUDA_VERSION_MAJOR} VERSION_LESS 7) message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile") endif() @@ -89,7 +89,7 @@ if(WITH_GPU) else() message(STATUS "Cannot find CUPTI, GPU Profiling is incorrect.") endif() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=\"${SIMD_FLAG}\"") + set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}") # Include cuda and cudnn include_directories(${CUDNN_INCLUDE_DIR}) @@ -97,11 +97,11 @@ if(WITH_GPU) if(TENSORRT_FOUND) if(WIN32) - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 9) + if(${CUDA_VERSION_MAJOR} VERSION_LESS 9) message(FATAL_ERROR "TensorRT needs CUDA >= 9.0 to compile on Windows") endif() else() - if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_LESS 8) + if(${CUDA_VERSION_MAJOR} VERSION_LESS 8) message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile") endif() if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 29e16b3757cd8c812f6bf4b6cb2574f32b6d04fd..7d1995360b47416fdd59c759147dd239d1157a04 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -104,12 +104,12 @@ function(select_nvcc_arch_flags out_variable) elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") set(cuda_arch_bin "60 61") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") - if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) + if (NOT ${CUDA_VERSION} LESS 10.0) add_definitions("-DSUPPORTS_CUDA_FP16") endif() set(cuda_arch_bin "70") elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") - if (NOT ${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) + if (NOT ${CUDA_VERSION} LESS 10.0) add_definitions("-DSUPPORTS_CUDA_FP16") endif() set(cuda_arch_bin "75") @@ -142,19 +142,19 @@ function(select_nvcc_arch_flags out_variable) foreach(arch ${cuda_arch_bin}) if(arch MATCHES "([0-9]+)\\(([0-9]+)\\)") # User explicitly specified PTX for the concrete BIN - string(APPEND nvcc_flags " -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}") - string(APPEND nvcc_archs_readable " sm_${CMAKE_MATCH_1}") + list(APPEND nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) + list(APPEND nvcc_archs_readable sm_${CMAKE_MATCH_1}) else() # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN - string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=sm_${arch}") - string(APPEND nvcc_archs_readable " sm_${arch}") + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=sm_${arch}) + list(APPEND nvcc_archs_readable sm_${arch}) endif() endforeach() # Tell NVCC to add PTX intermediate code for the specified architectures foreach(arch ${cuda_arch_ptx}) - string(APPEND nvcc_flags " -gencode arch=compute_${arch},code=compute_${arch}") - string(APPEND nvcc_archs_readable " compute_${arch}") + list(APPEND nvcc_flags -gencode arch=compute_${arch},code=compute_${arch}) + list(APPEND nvcc_archs_readable compute_${arch}) endforeach() string(REPLACE ";" " " nvcc_archs_readable "${nvcc_archs_readable}") @@ -162,32 +162,32 @@ function(select_nvcc_arch_flags out_variable) set(${out_variable}_readable ${nvcc_archs_readable} PARENT_SCOPE) endfunction() -message(STATUS "CUDA detected: " ${CMAKE_CUDA_COMPILER_VERSION}) -if (${CMAKE_CUDA_COMPILER_VERSION} LESS 7.0) +message(STATUS "CUDA detected: " ${CUDA_VERSION}) +if (${CUDA_VERSION} LESS 7.0) set(paddle_known_gpu_archs ${paddle_known_gpu_archs}) -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 8.0) # CUDA 7.x +elseif (${CUDA_VERSION} LESS 8.0) # CUDA 7.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs7}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 9.0) # CUDA 8.x + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") +elseif (${CUDA_VERSION} LESS 9.0) # CUDA 8.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs8}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") # CUDA 8 may complain that sm_20 is no longer supported. Suppress the # warning for now. - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Wno-deprecated-gpu-targets") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 10.0) # CUDA 9.x + list(APPEND CUDA_NVCC_FLAGS "-Wno-deprecated-gpu-targets") +elseif (${CUDA_VERSION} LESS 10.0) # CUDA 9.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs9}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") -elseif (${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0) # CUDA 10.x + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") +elseif (${CUDA_VERSION} LESS 11.0) # CUDA 10.x set(paddle_known_gpu_archs ${paddle_known_gpu_archs10}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D_MWAITXINTRIN_H_INCLUDED") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -D__STRICT_ANSI__") + list(APPEND CUDA_NVCC_FLAGS "-D_MWAITXINTRIN_H_INCLUDED") + list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__") endif() +add_definitions("-DPADDLE_CUDA_BINVER=\"${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}\"") -message(STATUS "PADDLE_CUDA_BINVER=${CUDA_VERSION_MAJOR}${CUDA_VERSION_MINOR}") - +include_directories(${CUDA_INCLUDE_DIRS}) if(NOT WITH_DSO) if(WIN32) set_property(GLOBAL PROPERTY CUDA_MODULES ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${CUDA_cusolver_LIBRARY}) @@ -196,24 +196,37 @@ endif(NOT WITH_DSO) # setting nvcc arch flags select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${NVCC_FLAGS_EXTRA}") -message(STATUS "NVCC_FLAGS_EXTRA: ${NVCC_FLAGS_EXTRA}") +list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_EXTRA}) +message(STATUS "Added CUDA NVCC flags for: ${NVCC_FLAGS_EXTRA_readable}") # Set C++11 support set(CUDA_PROPAGATE_HOST_FLAGS OFF) + # Release/Debug flags set by cmake. Such as -O3 -g -DNDEBUG etc. # So, don't set these flags here. -if (NOT WIN32) # windows msvc2015 support c++11 natively. - # -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake. - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -std=c++11") +if (NOT WIN32) # windows msvc2015 support c++11 natively. +# -std=c++11 -fPIC not recoginize by msvc, -Xcompiler will be added by cmake. +list(APPEND CUDA_NVCC_FLAGS "-std=c++11") +list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC") endif(NOT WIN32) -# in cuda9, suppress cuda warning on eigen -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -w") +# in cuda9, suppress cuda warning on eigen +list(APPEND CUDA_NVCC_FLAGS "-w") # Set :expt-relaxed-constexpr to suppress Eigen warnings -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") +list(APPEND CUDA_NVCC_FLAGS "--expt-relaxed-constexpr") -if (WIN32) +if (NOT WIN32) + if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_DEBUG}) + elseif(CMAKE_BUILD_TYPE STREQUAL "Release") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) + elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo") + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + elseif(CMAKE_BUILD_TYPE STREQUAL "MinSizeRel") + # nvcc 9 does not support -Os. Use Release flags instead + list(APPEND CUDA_NVCC_FLAGS ${CMAKE_CXX_FLAGS_RELEASE}) + endif() +else(NOT WIN32) list(APPEND CUDA_NVCC_FLAGS "-Xcompiler \"/wd 4244 /wd 4267 /wd 4819\"") list(APPEND CUDA_NVCC_FLAGS "--compiler-options;/bigobj") if(CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -223,9 +236,9 @@ if (WIN32) elseif(CMAKE_BUILD_TYPE STREQUAL "Release") list(APPEND CUDA_NVCC_FLAGS "-O3 -DNDEBUG") else() - message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.") - endif() -endif(WIN32) + message(FATAL "Windows only support Release or Debug build now. Please set visual studio build type to Release/Debug, x64 build.") +endif() +endif(NOT WIN32) mark_as_advanced(CUDA_BUILD_CUBIN CUDA_BUILD_EMULATION CUDA_VERBOSE_BUILD) mark_as_advanced(CUDA_SDK_ROOT_DIR CUDA_SEPARABLE_COMPILATION) diff --git a/cmake/flags.cmake b/cmake/flags.cmake index 1530536427b937ddaaef9378463f94337e0c14ae..884e5d45a6ae3cc817c33acda8e3fb67ecb962c5 100644 --- a/cmake/flags.cmake +++ b/cmake/flags.cmake @@ -76,7 +76,7 @@ macro(safe_set_nvflag flag_name) CHECK_C_COMPILER_FLAG(${flag_name} C_COMPILER_SUPPORT_FLAG_${safe_name}) set(safe_name C_COMPILER_SUPPORT_FLAG_${safe_name}) if(${safe_name}) - set(SAFE_GPU_COMMON_FLAGS "${SAFE_GPU_COMMON_FLAGS} -Xcompiler=\"${flag_name}\"") + LIST(APPEND CUDA_NVCC_FLAGS -Xcompiler ${flag_name}) endif() endmacro() @@ -169,7 +169,7 @@ if(NOT APPLE) -Wno-error=nonnull-compare # Warning in boost gcc 8.2 -Wno-error=address # Warning in boost gcc 8.2 -Wno-ignored-qualifiers # Warning in boost gcc 8.2 - -Wno-ignored-attributes # Warning in Eigen gcc 8.3 + -Wno-ignored-attributes # Warning in Eigen gcc 8.3 -Wno-parentheses # Warning in Eigen gcc 8.3 ) endif() @@ -187,7 +187,7 @@ set(GPU_COMMON_FLAGS -Wno-error=unused-function # Warnings in Numpy Header. -Wno-error=array-bounds # Warnings in Eigen::array ) -if (NOT WITH_NV_JETSON) +if (NOT WITH_NV_JETSON) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") endif() endif(NOT WIN32) @@ -212,14 +212,10 @@ foreach(flag ${COMMON_FLAGS}) safe_set_cxxflag(CMAKE_CXX_FLAGS ${flag}) endforeach() -set(SAFE_GPU_COMMON_FLAGS "") foreach(flag ${GPU_COMMON_FLAGS}) safe_set_nvflag(${flag}) endforeach() -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${SAFE_GPU_COMMON_FLAGS}") - - if(WIN32 AND MSVC_STATIC_CRT) # windows build turn off warnings. safe_set_static_flag() diff --git a/cmake/generic.cmake b/cmake/generic.cmake index fb35dfcfb254497e9677306ebf5cefa788807b1c..54a6e18b0078b3719e38a2777971f513b7fbab68 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -412,14 +412,10 @@ function(nv_library TARGET_NAME) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if(nv_library_SRCS) - # Attention: - # 1. cuda_add_library is deprecated after cmake v3.10, use add_library for CUDA please. - # 2. cuda_add_library does not support ccache. - # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html if (nv_library_SHARED OR nv_library_shared) # build *.so - add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) + cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) else() - add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) + cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) find_fluid_modules(${TARGET_NAME}) endif() if (nv_library_DEPS) @@ -454,7 +450,7 @@ function(nv_binary TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) if(nv_binary_DEPS) target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) add_dependencies(${TARGET_NAME} ${nv_binary_DEPS}) @@ -468,11 +464,7 @@ function(nv_test TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - # Attention: - # 1. cuda_add_executable is deprecated after cmake v3.10, use cuda_add_executable for CUDA please. - # 2. cuda_add_executable does not support ccache. - # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html - add_executable(${TARGET_NAME} ${nv_test_SRCS}) + cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules}) add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog) diff --git a/cmake/init.cmake b/cmake/init.cmake deleted file mode 100644 index a33bfdbd412b15ef8d35c18b38ba6e18a1d03b11..0000000000000000000000000000000000000000 --- a/cmake/init.cmake +++ /dev/null @@ -1,29 +0,0 @@ -# Attention: cmake will append these flags to compile command automatically. -# So if you want to add global option, change this file rather than flags.cmake - -# default: "-g" -set(CMAKE_C_FLAGS_DEBUG "-g") -# default: "-O3 -DNDEBUG" -set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG") -# default: "-O2 -g -DNDEBUG" -set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") -# default: "-Os -DNDEBUG" -set(CMAKE_C_FLAGS_MINSIZEREL "-Os -DNDEBUG") - -# default: "-g" -set(CMAKE_CXX_FLAGS_DEBUG "-g") -# default: "-O3 -DNDEBUG" -set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") -# default: "-O2 -g -DNDEBUG" -set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") -# default: "-Os -DNDEBUG" -set(CMAKE_CXX_FLAGS_MINSIZEREL "-Os -DNDEBUG") - -# default: "-g" -set(CMAKE_CUDA_FLAGS_DEBUG "-g") -# default: "-O3 -DNDEBUG" -set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG") -# default: "-O2 -g -DNDEBUG" -set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g -DNDEBUG") -# default: "-O1 -DNDEBUG" -set(CMAKE_CUDA_FLAGS_MINSIZEREL "-O1 -DNDEBUG")