From 755049f2bb5034100edfec95aa4d6899a121ec25 Mon Sep 17 00:00:00 2001 From: chalsliu <45041955+chalsliu@users.noreply.github.com> Date: Tue, 7 Feb 2023 14:50:33 +0800 Subject: [PATCH] Support build with gcc12 for CUDA less than 12.0 (#50106) --- .../cuda_module_loading_lazy.cmake | 5 ++++ cmake/external/gloo.cmake | 12 +++++++++ cmake/external/protobuf.cmake | 12 +++++++++ cmake/external/warpctc.cmake | 17 ++++++++++++- .../fleet/heter_ps/gpu_graph_utils.h | 2 +- paddle/fluid/operators/jit/CMakeLists.txt | 25 +++++++++++++------ paddle/phi/tools/CMakeLists.txt | 7 ++++++ patches/gloo/device.cc.patch | 10 ++++++++ patches/warpctc/CMakeLists.txt.patch | 13 ++++++++++ 9 files changed, 93 insertions(+), 10 deletions(-) create mode 100644 patches/gloo/device.cc.patch create mode 100644 patches/warpctc/CMakeLists.txt.patch diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake index d1e07f57cb0..a13af61af65 100644 --- a/cmake/experiments/cuda_module_loading_lazy.cmake +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -31,6 +31,11 @@ if(LINUX) message("cuda 11.7+ already support lazy module loading") return() endif() + if(${CUDA_VERSION} VERSION_LESS "11.2" AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER_EQUAL 12.0) + message("cuda less than 11.2 doesn't support gcc12") + return() + endif() message( "for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a" diff --git a/cmake/external/gloo.cmake b/cmake/external/gloo.cmake index a4d592c10a8..a13772d5de3 100755 --- a/cmake/external/gloo.cmake +++ b/cmake/external/gloo.cmake @@ -31,6 +31,17 @@ set(GLOO_LIBRARIES "${GLOO_INSTALL_DIR}/lib/libgloo.a" CACHE FILEPATH "gloo library." FORCE) +set(GLOO_PATCH_COMMAND "") +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH ${PADDLE_SOURCE_DIR}/patches/gloo/device.cc.patch + native_dst) + set(GLOO_PATCH_COMMAND patch -d ${GLOO_SOURCE_DIR}/gloo/transport/tcp < + ${native_dst}) + endif() +endif() + include_directories(${GLOO_INCLUDE_DIR}) if(WITH_ASCEND OR WITH_ASCEND_CL) @@ -59,6 +70,7 @@ else() GIT_TAG ${GLOO_TAG} PREFIX "${GLOO_PREFIX_DIR}" UPDATE_COMMAND "" + PATCH_COMMAND ${GLOO_PATCH_COMMAND} CONFIGURE_COMMAND "" BUILD_COMMAND mkdir -p ${GLOO_SOURCE_DIR}/build && cd ${GLOO_SOURCE_DIR}/build && cmake diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index e3c774d29d4..e774aff8b02 100755 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -250,6 +250,12 @@ function(build_protobuf TARGET_NAME BUILD_FOR_HOST) else() set(PROTOBUF_REPOSITORY ${GIT_URL}/protocolbuffers/protobuf.git) set(PROTOBUF_TAG 9f75c5aa851cd877fb0d93ccc31b8567a6706546) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_TAG 2dc747c574b68a808ea4699d26942c8132fe2b09) + endif() + endif() endif() if(WITH_ARM_BRPC) set(ARM_PROTOBUF_URL @@ -322,6 +328,12 @@ elseif(WITH_ARM_BRPC) set(PROTOBUF_VERSION 3.7.1-baidu-ee-common) else() set(PROTOBUF_VERSION 3.1.0) + if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(PROTOBUF_VERSION 3.16.0) + endif() + endif() endif() if(NOT PROTOBUF_FOUND) diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index aa8ab62d7ae..7f8da7fbe50 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -25,6 +25,19 @@ set(WARPCTC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/warpctc) set(WARPCTC_REPOSITORY ${GIT_URL}/baidu-research/warp-ctc.git) set(WARPCTC_TAG bdc2b4550453e0ef2d3b5190f9c6103a84eff184) +set(WARPCTC_SOURCE_DIR ${THIRD_PARTY_PATH}/warpctc/src/extern_warpctc) +set(WARPCTC_PATCH_COMMAND "") +set(WARPCTC_CCBIN_OPTION "") +if(NOT WIN32 AND WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + file(TO_NATIVE_PATH + ${PADDLE_SOURCE_DIR}/patches/warpctc/CMakeLists.txt.patch native_src) + set(WARPCTC_PATCH_COMMAND patch -d ${WARPCTC_SOURCE_DIR} < ${native_src}) + set(WARPCTC_CCBIN_OPTION -DCCBIN_COMPILER=${CCBIN_COMPILER}) + endif() +endif() + set(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include" CACHE PATH "Warp-ctc Directory" FORCE) @@ -112,7 +125,7 @@ else() GIT_TAG ${WARPCTC_TAG} PREFIX ${WARPCTC_PREFIX_DIR} UPDATE_COMMAND "" - PATCH_COMMAND "" + PATCH_COMMAND ${WARPCTC_PATCH_COMMAND} #BUILD_ALWAYS 1 CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} @@ -132,7 +145,9 @@ else() -DBUILD_TESTS=OFF -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} ${EXTERNAL_OPTIONAL_ARGS} + ${WARPCTC_CCBIN_OPTION} CMAKE_CACHE_ARGS -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON diff --git a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h index 4d53ff8a739..655e3c2a5cf 100644 --- a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h +++ b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h @@ -62,7 +62,7 @@ inline std::vector shuffle_int_vector(int n) { e = i++; } std::shuffle(ret.begin(), ret.end(), random_engine_wrapper.engine); - return std::move(ret); + return ret; } #define CUDA_CHECK(cmd) \ diff --git a/paddle/fluid/operators/jit/CMakeLists.txt b/paddle/fluid/operators/jit/CMakeLists.txt index 8aa7eea6708..68a5e7d22dd 100644 --- a/paddle/fluid/operators/jit/CMakeLists.txt +++ b/paddle/fluid/operators/jit/CMakeLists.txt @@ -39,14 +39,23 @@ cc_test( SRCS test.cc DEPS jit_kernel_helper) if(NOT WIN32) - cc_binary( - jit_kernel_benchmark - SRCS - benchmark.cc - DEPS - jit_kernel_helper - device_tracer - tensor) + set(cuda_less12_and_gcc_greater12 false) + if(DEFINED CMAKE_CUDA_COMPILER_VERSION) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 + AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 12.0) + set(cuda_less12_and_gcc_greater12 true) + endif() + endif() + if(NOT cuda_less12_and_gcc_greater12) + cc_binary( + jit_kernel_benchmark + SRCS + benchmark.cc + DEPS + jit_kernel_helper + device_tracer + tensor) + endif() endif() if(WITH_TESTING AND TEST jit_kernel_test) set_tests_properties(jit_kernel_test PROPERTIES TIMEOUT 120) diff --git a/paddle/phi/tools/CMakeLists.txt b/paddle/phi/tools/CMakeLists.txt index 9c2492cd4ef..bf66522862c 100644 --- a/paddle/phi/tools/CMakeLists.txt +++ b/paddle/phi/tools/CMakeLists.txt @@ -1,3 +1,10 @@ +if(WITH_GPU) + if(${CMAKE_CUDA_COMPILER_VERSION} LESS 12.0 AND ${CMAKE_CXX_COMPILER_VERSION} + VERSION_GREATER 12.0) + return() + endif() +endif() + add_executable(print_phi_kernels print_phi_kernels.cc) target_link_libraries(print_phi_kernels phi phi_api_utils) if(WIN32) diff --git a/patches/gloo/device.cc.patch b/patches/gloo/device.cc.patch new file mode 100644 index 00000000000..10769b3e0ca --- /dev/null +++ b/patches/gloo/device.cc.patch @@ -0,0 +1,10 @@ +--- a/device.cc ++++ b/device.cc +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + #include "gloo/common/linux.h" + #include "gloo/common/logging.h" diff --git a/patches/warpctc/CMakeLists.txt.patch b/patches/warpctc/CMakeLists.txt.patch new file mode 100644 index 00000000000..9605d8b2d31 --- /dev/null +++ b/patches/warpctc/CMakeLists.txt.patch @@ -0,0 +1,13 @@ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -23,6 +23,10 @@ + option(BUILD_SHARED "build warp-ctc shared library." ON) + option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) + ++set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(NVCC_PREPEND_FLAGS "${NVCC_PREPEND_FLAGS} -ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++set(CCBIN "-ccbin ${CCBIN_COMPILER} -allow-unsupported-compiler") ++ + if(WITH_ROCM) + add_definitions(-DWARPCTC_WITH_HIP) + include(hip) -- GitLab