diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake index d1e07f57cb045e1164cf4bf8d8a72ad3454bc4af..a13af61af6512f425105d06465e9d093b624e270 100644 --- a/cmake/experiments/cuda_module_loading_lazy.cmake +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -31,6 +31,11 @@ if(LINUX) message("cuda 11.7+ already support lazy module loading") return() endif() + if(${CUDA_VERSION} VERSION_LESS "11.2" AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER_EQUAL 12.0) + message("cuda less than 11.2 doesn't support gcc12") + return() + endif() message( "for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a" diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake index a4d592c10a897d360719898dc8e54cf10c852bb9..a13772d5de319df0aaa67ff7a0ef265e2317fed0 100755 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -31,6 +31,17 @@ set(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE) +set(GLOO_PATCH_COMMAND "") +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/gloo/device.cc.patch + native_dst) + set(GLOO_PATCH_COMMAND patch -d ${GLOO_SOURCE_DIR}/gloo/transport/tcp < + ${native_dst}) + endif() +endif() + include_directories(${GLOO_INCLUDE_DIR}) if(WITH_ASCEND OR WITH_ASCEND_CL) @@ -59,6 +70,7 @@ else() GIT_TAG ${GLOO_TAG} PREFIX "${GLOO_PREFIX_DIR}" UPDATE_COMMAND "" + PATCH_COMMAND ${GLOO_PATCH_COMMAND} CONFIGURE_COMMAND "" BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build && cd ${GLOO_SOURCE_DIR}/build && cmake diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e3c774d29d4f5348a6dcb6e0b802bced00007617..e774aff8b0298fe95004ed066ab4cc1796ee3c61 100755 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -250,6 +250,12 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST) else() set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) set(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_TAG 2dc747c574b68a808ea4699d26942c8132fe2b09) + endif() + endif() endif() if(WITH_ARM_BRPC) set(ARM_PROTOBUF_URL @@ -322,6 +328,12 @@ elseif(WITH_ARM_BRPC) set(PROTOBUF_VERSION 3.7.1-baidu-ee-common) else() set(PROTOBUF_VERSION 3.1.0) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_VERSION 3.16.0) + endif() + endif() endif() if(NOT PROTOBUF_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index aa8ab62d7ae28197e9f18b83440f75c8c68d8fff..7f8da7fbe506b5711a2855a9525a38eb8a94b767 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -25,6 +25,19 @@ set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) set(WARPCTC_TAG bdc2b4550453e0ef2d3b5190f9c6103a84eff184) +set(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) +set(WARPCTC_PATCH_COMMAND "") +set(WARPCTC_CCBIN_OPTION "") +if(NOT WIN32 AND WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH + ${PADDLE_SOURCE_DIR}/patches/warpctc/CMakeLists.txt.patch native_src) + set(WARPCTC_PATCH_COMMAND patch -d ${WARPCTC_SOURCE_DIR} < ${native_src}) + set(WARPCTC_CCBIN_OPTION -DCCBIN_COMPILER=${CCBIN_COMPILER}) + endif() +endif() + set(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -112,7 +125,7 @@ else() GIT_TAG ${WARPCTC_TAG} PREFIX ${WARPCTC_PREFIX_DIR} UPDATE_COMMAND "" - PATCH_COMMAND "" + PATCH_COMMAND ${WARPCTC_PATCH_COMMAND} #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} @@ -132,7 +145,9 @@ else() -DBUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} ${EXTERNAL_OPTIONAL_ARGS} + ${WARPCTC_CCBIN_OPTION} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON diff --git a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h index 4d53ff8a7391a1888c8c8e92f5203b1b1e17c845..655e3c2a5cf0d046e5e2a7189a537f81a92fca7e 100644 --- a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h +++ b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h @@ -62,7 +62,7 @@ inline std::vector shuffle_int_vector(int n) { e = i++; } std::shuffle(ret.begin(), ret.end(), random_engine_wrapper.engine); - return std::move(ret); + return ret; } #define CUDA_CHECK(cmd) \ diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt index 8aa7eea6708a62121ea1acc2f6377bc62a804b85..68a5e7d22ddc6e67deb8285db6e9e2ef67cbb0b4 100644 --- a/paddle/fluid/operators/jit/CMakeLists.txt +++ b/paddle/fluid/operators/jit/CMakeLists.txt @@ -39,14 +39,23 @@ cc_test( SRCS test.cc DEPS jit_kernel_helper) if(NOT WIN32) - cc_binary( - jit_kernel_benchmark - SRCS - benchmark.cc - DEPS - jit_kernel_helper - device_tracer - tensor) + set(cuda_less12_and_gcc_greater12 false) + if(DEFINED CMAKE_CUDA_COMPILER_VERSION) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(cuda_less12_and_gcc_greater12 true) + endif() + endif() + if(NOT cuda_less12_and_gcc_greater12) + cc_binary( + jit_kernel_benchmark + SRCS + benchmark.cc + DEPS + jit_kernel_helper + device_tracer + tensor) + endif() endif() if(WITH_TESTING AND TEST jit_kernel_test) set_tests_properties(jit_kernel_test PROPERTIES TIMEOUT 120) diff --git a/paddle/phi/tools/CMakeLists.txt b/paddle/phi/tools/CMakeLists.txt index 9c2492cd4ef34f0f676075ce61effdd372fdd5e5..bf66522862cd63c93d71c8ec1e79d91f93662b47 100644 --- a/paddle/phi/tools/CMakeLists.txt +++ b/paddle/phi/tools/CMakeLists.txt @@ -1,3 +1,10 @@ +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + return() + endif() +endif() + add_executable(print_phi_kernels print_phi_kernels.cc) target_link_libraries(print_phi_kernels phi phi_api_utils) if(WIN32) diff --git a/patches/gloo/device.cc.patch b/patches/gloo/device.cc.patch new file mode 100644 index 0000000000000000000000000000000000000000..10769b3e0ca3311cd72db6cd791f0dedfbcd395b --- /dev/null +++ b/patches/gloo/device.cc.patch @@ -0,0 +1,10 @@ +--- a/device.cc ++++ b/device.cc +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + #include "gloo/common/linux.h" + #include "gloo/common/logging.h" diff --git a/patches/warpctc/CMakeLists.txt.patch b/patches/warpctc/CMakeLists.txt.patch new file mode 100644 index 0000000000000000000000000000000000000000..9605d8b2d317b188e10d996e38bea61c856c8403 --- /dev/null +++ b/patches/warpctc/CMakeLists.txt.patch @@ -0,0 +1,13 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -23,6 +23,10 @@ + option(BUILD_SHARED "build warp-ctc shared library." ON) + option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) + ++set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(NVCC_PREPEND_FLAGS "${NVCC_PREPEND_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(CCBIN "-ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++ + if(WITH_ROCM) + add_definitions(-DWARPCTC_WITH_HIP) + include(hip)