diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake index d1e07f57cb045e1164cf4bf8d8a72ad3454bc4af..e5ed89bfb5dc50365e71bc7c79c7e5e5dd857ee8 100644 --- a/cmake/experiments/cuda_module_loading_lazy.cmake +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -31,6 +31,11 @@ if(LINUX) message("cuda 11.7+ already support lazy module loading") return() endif() + if(${CUDA_VERSION} VERSION_LESS "12.0" AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER_EQUAL 12.0) + message("cuda less than 12.0 doesn't support gcc12") + return() + endif() message( "for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a" diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake index a0fc013a130a15dc385b5ab8ef05929fc4440040..8f871c5bd49f4bd8d11bde9c4624fa85e4a3a7bd 100755 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -31,6 +31,17 @@ set(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE) +set(GLOO_PATCH_COMMAND "") +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/gloo/device.cc.patch + native_dst) + set(GLOO_PATCH_COMMAND patch -d ${GLOO_SOURCE_DIR}/gloo/transport/tcp < + ${native_dst}) + endif() +endif() + include_directories(${GLOO_INCLUDE_DIR}) if(WITH_ASCEND OR WITH_ASCEND_CL) @@ -59,6 +70,7 @@ else() GIT_TAG ${GLOO_TAG} PREFIX "${GLOO_PREFIX_DIR}" UPDATE_COMMAND "" + PATCH_COMMAND ${GLOO_PATCH_COMMAND} CONFIGURE_COMMAND "" BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build && cd ${GLOO_SOURCE_DIR}/build && cmake diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 5903edebae825f70c986ad6021d9584f0225d372..ff78c84b86571fab2754fd74a61ce5f697404efb 100755 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -250,6 +250,12 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST) else() set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) set(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_TAG 2dc747c574b68a808ea4699d26942c8132fe2b09) + endif() + endif() endif() if(WITH_ARM_BRPC) set(ARM_PROTOBUF_URL @@ -322,6 +328,12 @@ elseif(WITH_ARM_BRPC) set(PROTOBUF_VERSION 3.7.1-baidu-ee-common) else() set(PROTOBUF_VERSION 3.1.0) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_VERSION 3.16.0) + endif() + endif() endif() if(NOT PROTOBUF_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index c7a4e1d99bff16feff685c6da98ef72cdd9d89b7..d5ad47cf809630b417c577f23513507e0fa02c41 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -25,6 +25,19 @@ set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) set(WARPCTC_TAG 37ece0e1bbe8a0019a63ac7e6462c36591c66a5b) +set(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) +set(WARPCTC_PATCH_COMMAND "") +set(WARPCTC_CCBIN_OPTION "") +if(NOT WIN32 AND WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH + ${PADDLE_SOURCE_DIR}/patches/warpctc/CMakeLists.txt.patch native_src) + set(WARPCTC_PATCH_COMMAND patch -d ${WARPCTC_SOURCE_DIR} < ${native_src}) + set(WARPCTC_CCBIN_OPTION -DCCBIN_COMPILER=${CCBIN_COMPILER}) + endif() +endif() + set(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -112,7 +125,7 @@ else() GIT_TAG ${WARPCTC_TAG} PREFIX ${WARPCTC_PREFIX_DIR} UPDATE_COMMAND "" - PATCH_COMMAND "" + PATCH_COMMAND ${WARPCTC_PATCH_COMMAND} #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} @@ -132,7 +145,9 @@ else() -DBUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} ${EXTERNAL_OPTIONAL_ARGS} + ${WARPCTC_CCBIN_OPTION} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt index a6f10e5fbdab7cb7bbea0eaa88091cb5a33d0128..0a3c01cc091619faf3d5f0b4d5682596625b4a13 100644 --- a/paddle/fluid/operators/jit/CMakeLists.txt +++ b/paddle/fluid/operators/jit/CMakeLists.txt @@ -39,14 +39,23 @@ cc_test( SRCS test.cc DEPS jit_kernel_helper) if(NOT WIN32) - cc_binary( - jit_kernel_benchmark - SRCS - benchmark.cc - DEPS - jit_kernel_helper - device_tracer - tensor) + set(cuda_less12_and_gcc_greater12 false) + if(DEFINED CMAKE_CUDA_COMPILER_VERSION) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(cuda_less12_and_gcc_greater12 true) + endif() + endif() + if(NOT cuda_less12_and_gcc_greater12) + cc_binary( + jit_kernel_benchmark + SRCS + benchmark.cc + DEPS + jit_kernel_helper + device_tracer + tensor) + endif() endif() if(WITH_TESTING AND TEST jit_kernel_test) set_tests_properties(jit_kernel_test PROPERTIES TIMEOUT 120) diff --git a/paddle/phi/tools/CMakeLists.txt b/paddle/phi/tools/CMakeLists.txt index d1df5ec32754608ad1039efe27e20bdd9876fc40..792adb12980a02010eaab2d5016f78cd51cee3e6 100644 --- a/paddle/phi/tools/CMakeLists.txt +++ b/paddle/phi/tools/CMakeLists.txt @@ -1,3 +1,9 @@ +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + return() + endif() +endif() add_executable(print_pten_kernels print_pten_kernels.cc) target_link_libraries(print_pten_kernels phi phi_api_utils) if(WIN32) diff --git a/patches/gloo/device.cc.patch b/patches/gloo/device.cc.patch new file mode 100644 index 0000000000000000000000000000000000000000..10769b3e0ca3311cd72db6cd791f0dedfbcd395b --- /dev/null +++ b/patches/gloo/device.cc.patch @@ -0,0 +1,10 @@ +--- a/device.cc ++++ b/device.cc +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + #include "gloo/common/linux.h" + #include "gloo/common/logging.h" diff --git a/patches/warpctc/CMakeLists.txt.patch b/patches/warpctc/CMakeLists.txt.patch new file mode 100644 index 0000000000000000000000000000000000000000..9605d8b2d317b188e10d996e38bea61c856c8403 --- /dev/null +++ b/patches/warpctc/CMakeLists.txt.patch @@ -0,0 +1,13 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -23,6 +23,10 @@ + option(BUILD_SHARED "build warp-ctc shared library." ON) + option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) + ++set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(NVCC_PREPEND_FLAGS "${NVCC_PREPEND_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(CCBIN "-ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++ + if(WITH_ROCM) + add_definitions(-DWARPCTC_WITH_HIP) + include(hip)