未验证 提交 210fa777 编写于 作者: X xiaoxiaohehe001 提交者: GitHub

nvcclazylinux (#44957)

上级 30b66f03
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License # limitations under the License
if(APPLE AND WITH_ARM) if(APPLE AND WITH_ARM)
# cmake 3.19.2 version starts to support M1 # cmake 3.19.2 version starts to support M1
cmake_minimum_required(VERSION 3.19.2) cmake_minimum_required(VERSION 3.19.2)
cmake_policy(VERSION 3.19.2) cmake_policy(VERSION 3.19.2)
else(APPLE AND WITH_ARM) else(APPLE AND WITH_ARM)
cmake_minimum_required(VERSION 3.15) cmake_minimum_required(VERSION 3.15)
cmake_policy(VERSION 3.10) cmake_policy(VERSION 3.10)
endif(APPLE AND WITH_ARM) endif(APPLE AND WITH_ARM)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
...@@ -28,9 +28,12 @@ include(system) ...@@ -28,9 +28,12 @@ include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug # Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING set(CMAKE_BUILD_TYPE
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" "Release"
FORCE) CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE)
endif() endif()
project(paddle CXX C) project(paddle CXX C)
...@@ -39,152 +42,181 @@ project(paddle CXX C) ...@@ -39,152 +42,181 @@ project(paddle CXX C)
# TODO(Shibo Tao): remove find_package(CUDA) completely. # TODO(Shibo Tao): remove find_package(CUDA) completely.
find_package(CUDA QUIET) find_package(CUDA QUIET)
find_package(MKL CONFIG QUIET) find_package(MKL CONFIG QUIET)
option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF) option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND}) option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF) option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF) option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF) option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF) option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF) option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF) option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF) option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF) option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON # NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
# to develop some acl related functionality on x86 # to develop some acl related functionality on x86
option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND}) option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF) option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF) option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
# Note(zhouwei): It use option above, so put here # Note(zhouwei): It use option above, so put here
include(init) include(init)
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(experimental) # experimental build options
if (WITH_GPU AND WITH_XPU) if(WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time") message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif() endif()
if (WITH_GPU AND WITH_XPU_KP) if(WITH_GPU AND WITH_XPU_KP)
message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time") message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time")
endif() endif()
if (WITH_GPU AND WITH_ASCEND) if(WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time") message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif() endif()
if (WITH_GPU AND WITH_ROCM) if(WITH_GPU AND WITH_ROCM)
message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time") message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
endif() endif()
if (WITH_GPU AND WITH_MLU) if(WITH_GPU AND WITH_MLU)
message(FATAL_ERROR "Error when compile GPU and MLU at the same time") message(FATAL_ERROR "Error when compile GPU and MLU at the same time")
endif() endif()
if(WITH_GPU AND NOT APPLE) if(WITH_GPU AND NOT APPLE)
enable_language(CUDA) enable_language(CUDA)
message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: " message(STATUS "CUDA compiler: ${CMAKE_CUDA_COMPILER}, version: "
"${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}") "${CMAKE_CUDA_COMPILER_ID} ${CMAKE_CUDA_COMPILER_VERSION}")
endif() endif()
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: " message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
"${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
message(STATUS "AR tools: ${CMAKE_AR}") message(STATUS "AR tools: ${CMAKE_AR}")
# MUSL build turn off warnings # MUSL build turn off warnings
if(WITH_MUSL) if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy") set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
endif() endif()
if(APPLE AND WITH_ARM) if(APPLE AND WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin") set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
endif() endif()
if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11) if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif() endif()
if(WIN32) if(WIN32)
option(MSVC_STATIC_CRT "use static C Runtime library by default" ON) option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
set(CMAKE_SUPPRESS_REGENERATION ON) set(CMAKE_SUPPRESS_REGENERATION ON)
set(CMAKE_STATIC_LIBRARY_PREFIX lib) set(CMAKE_STATIC_LIBRARY_PREFIX lib)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj")
if("${CMAKE_GENERATOR}" STREQUAL "Ninja") if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zc:inline") set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zc:inline") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zc:inline")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zc:inline") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /Zc:inline")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline")
endif()
if(MSVC_STATIC_CRT)
message(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif()
endforeach(flag_var)
endif()
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions("-DNOMINMAX")
# windows build turn off warnings, use parallel compiling.
foreach(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
# For Visual Studio generators, /MP should be added.
# For other generators like Ninja, it is not need to add /MP.
if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT WITH_GPU)
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
endif() endif()
endforeach(flag_var)
if (MSVC_STATIC_CRT) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019") set(${flag_var} "${${flag_var}} /w")
foreach(flag_var endforeach(flag_var)
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO # Windows Remove /Zi, /ZI for Release, MinSizeRel builds
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE foreach(flag_var
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
if(${flag_var} MATCHES "/MD") CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") if(${flag_var} MATCHES "/Z[iI]")
endif() string(REGEX REPLACE "/Z[iI]" "" ${flag_var} "${${flag_var}}")
endforeach(flag_var)
endif() endif()
endforeach(flag_var)
set(CMAKE_C_FLAGS
"${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var}
"${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
if(MSVC_STATIC_CRT)
set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
endif()
endforeach(flag_var)
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally if(WITH_WIN_DUMP_DBG)
add_definitions("-DNOMINMAX") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi")
# windows build turn off warnings, use parallel compiling. set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi")
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
# For Visual Studio generators, /MP should be added.
# For other generators like Ninja, it is not need to add /MP.
if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT WITH_GPU)
math(EXPR PROCESS_MAX "${CPU_CORES} * 2 / 3")
set(${flag_var} "${${flag_var}} /MP${PROCESS_MAX}")
endif()
endforeach(flag_var)
foreach(flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
set(${flag_var} "${${flag_var}} /w")
endforeach(flag_var)
# Windows Remove /Zi, /ZI for Release, MinSizeRel builds
foreach(flag_var
CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
if(${flag_var} MATCHES "/Z[iI]")
string(REGEX REPLACE "/Z[iI]" "" ${flag_var} "${${flag_var}}")
endif()
endforeach(flag_var)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838")
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS) foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
set(${flag_var} "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221") CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
if(MSVC_STATIC_CRT) set(${flag_var} "${${flag_var}} /DEBUG /OPT:REF /OPT:ICF")
set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
endif()
endforeach(flag_var) endforeach(flag_var)
if (WITH_WIN_DUMP_DBG) add_definitions("-DWITH_WIN_DUMP_DBG")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi") endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi")
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var} "${${flag_var}} /DEBUG /OPT:REF /OPT:ICF")
endforeach(flag_var)
add_definitions("-DWITH_WIN_DUMP_DBG")
endif()
else(WIN32) else(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations") set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
endif(WIN32) endif(WIN32)
find_package(Git REQUIRED) find_package(Git REQUIRED)
...@@ -192,7 +224,7 @@ find_package(Git REQUIRED) ...@@ -192,7 +224,7 @@ find_package(Git REQUIRED)
# config GIT_URL with github mirrors to speed up dependent repos clone # config GIT_URL with github mirrors to speed up dependent repos clone
option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL}) option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
if(NOT GIT_URL) if(NOT GIT_URL)
set(GIT_URL "https://github.com") set(GIT_URL "https://github.com")
endif() endif()
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
...@@ -200,58 +232,75 @@ find_package(Threads REQUIRED) ...@@ -200,58 +232,75 @@ find_package(Threads REQUIRED)
include(simd) include(simd)
################################ Exposed Configurations ####################################### ################################ Exposed Configurations #######################################
option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND}) option(WITH_AVX "Compile PaddlePaddle with AVX intrinsics" ${AVX_FOUND})
option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON) option(WITH_PYTHON "Compile PaddlePaddle with python interpreter" ON)
option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF) option(WITH_TESTING "Compile PaddlePaddle with unit testing" OFF)
option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND}) option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FOUND})
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation"
OFF)
################################ Internal Configurations ####################################### ################################ Internal Configurations #######################################
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) OFF)
option(WITH_INCREMENTAL_COVERAGE "Generate coverage reports only for incremental code" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
OPTION(WITH_LIBXSMM "Compile with libxsmm" OFF) option(WITH_INCREMENTAL_COVERAGE
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) "Generate coverage reports only for incremental code" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF) option(WITH_LIBXSMM "Compile with libxsmm" OFF)
option(WITH_BOX_PS "Compile with box_ps support" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(WITH_XBYAK "Compile with xbyak support" ON) option(WITH_PSLIB "Compile with pslib support" OFF)
option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_BOX_PS "Compile with box_ps support" OFF)
option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE}) option(WITH_XBYAK "Compile with xbyak support" ON)
option(WITH_HETERPS "Compile with heterps" OFF}) option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF) option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(WITH_HETERPS "Compile with heterps" OFF})
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE}) option(WITH_INFERENCE_API_TEST
option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF) "Test fluid inference C++ high-level api interface" OFF)
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_CINN "Compile PaddlePaddle with CINN" OFF) option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(WITH_INFRT "Compile PaddlePaddle with INFRT" OFF) option(
option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON) SANITIZER_TYPE
option(WITH_RCCL "Compile PaddlePaddle with RCCL support" ON) "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF) OFF)
option(WITH_CNCL "Compile PaddlePaddle with CNCL support" OFF) option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON) option(WITH_CINN "Compile PaddlePaddle with CINN" OFF)
option(WITH_ARM "Compile PaddlePaddle with arm support" OFF) option(WITH_INFRT "Compile PaddlePaddle with INFRT" OFF)
option(WITH_SW "Compile PaddlePaddle with sw support" OFF) option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF) option(WITH_RCCL "Compile PaddlePaddle with RCCL support" ON)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF) option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF) option(WITH_CNCL "Compile PaddlePaddle with CNCL support" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF) option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
option(NEW_RELEASE_PYPI "PaddlePaddle next-level release strategy for pypi cubin package" OFF) option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
option(NEW_RELEASE_ALL "PaddlePaddle next-level release strategy for all arches cubin package" OFF) option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
option(NEW_RELEASE_JIT "PaddlePaddle next-level release strategy for backup jit package" OFF) option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF) option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_POCKETFFT "Compile with pocketfft support" ON) option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_RECORD_BUILDTIME "Compile PaddlePaddle with record all targets build time" OFF) option(WITH_STRIP "Strip so files of Whl packages" OFF)
option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF) option(NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package" OFF)
option(NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF)
option(NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package" OFF)
option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF)
option(WITH_POCKETFFT "Compile with pocketfft support" ON)
option(WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time" OFF)
option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF)
if(WITH_RECORD_BUILDTIME) if(WITH_RECORD_BUILDTIME)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh") set_property(
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh") GLOBAL PROPERTY RULE_LAUNCH_COMPILE
else() "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
include(ccache) # set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache set_property(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
else()
include(ccache
)# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif() endif()
unset(WITH_RECORD_BUILDTIME CACHE) unset(WITH_RECORD_BUILDTIME CACHE)
...@@ -261,186 +310,224 @@ if(NOT PY_VERSION) ...@@ -261,186 +310,224 @@ if(NOT PY_VERSION)
endif() endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION}) set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF # the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES "^(Address|Leak|Memory|Thread|Undefined)$") if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$")
message("Choose the correct type of sanitizer") message("Choose the correct type of sanitizer")
return() return()
endif() endif()
if (LINUX AND NOT WITH_CUSTOM_DEVICE AND NOT ON_INFER) if(LINUX
set(WITH_CUSTOM_DEVICE ON) AND NOT WITH_CUSTOM_DEVICE
AND NOT ON_INFER)
set(WITH_CUSTOM_DEVICE ON)
endif() endif()
if(WIN32) if(WIN32)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
MESSAGE(WARNING message(
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF.") WARNING
set(WITH_DISTRIBUTE OFF CACHE STRING "Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
"Disable DISTRIBUTE when compiling for Windows" FORCE) )
endif() set(WITH_DISTRIBUTE
if(WITH_NCCL) OFF
MESSAGE(WARNING CACHE STRING "Disable DISTRIBUTE when compiling for Windows" FORCE)
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF.") endif()
set(WITH_NCCL OFF CACHE STRING if(WITH_NCCL)
"Disable NCCL when compiling for Windows" FORCE) message(
endif() WARNING "Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF.")
endif() set(WITH_NCCL
OFF
if (NOT WITH_GPU AND WITH_NCCL) CACHE STRING "Disable NCCL when compiling for Windows" FORCE)
MESSAGE(WARNING endif()
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.") endif()
set(WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling without GPU" FORCE) if(NOT WITH_GPU AND WITH_NCCL)
message(
WARNING "Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.")
set(WITH_NCCL
OFF
CACHE STRING "Disable NCCL when compiling without GPU" FORCE)
endif() endif()
# force WITH_XPU on when WITH_XPU_KP # force WITH_XPU on when WITH_XPU_KP
if (WITH_XPU_KP AND NOT WITH_XPU) if(WITH_XPU_KP AND NOT WITH_XPU)
MESSAGE(WARNING message(
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.") WARNING
set(WITH_XPU ON CACHE STRING "Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.")
"Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE) set(WITH_XPU
ON
CACHE STRING "Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE)
endif() endif()
if (NOT WITH_XPU AND WITH_XPU_BKCL) if(NOT WITH_XPU AND WITH_XPU_BKCL)
MESSAGE(WARNING message(
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.") WARNING "Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
set(WITH_XPU_BKCL OFF CACHE STRING set(WITH_XPU_BKCL
"Disable BKCL when compiling without XPU" FORCE) OFF
CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
endif() endif()
if (NOT WITH_MLU AND WITH_CNCL) if(NOT WITH_MLU AND WITH_CNCL)
MESSAGE(WARNING message(
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.") WARNING "Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.")
set(WITH_MLU OFF CACHE STRING set(WITH_MLU
"Disable CNCL when compiling without MLU" FORCE) OFF
CACHE STRING "Disable CNCL when compiling without MLU" FORCE)
endif() endif()
if(WITH_NCCL) if(WITH_NCCL)
add_definitions("-DPADDLE_WITH_NCCL") add_definitions("-DPADDLE_WITH_NCCL")
include(nccl) include(nccl)
else() else()
if(WITH_GPU) if(WITH_GPU)
MESSAGE(WARNING "If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used.") message(
endif() WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif()
endif() endif()
if(WITH_BRPC_RDMA) if(WITH_BRPC_RDMA)
message(STATUS "Use brpc with rdma.") message(STATUS "Use brpc with rdma.")
if(NOT WITH_DISTRIBUTE) if(NOT WITH_DISTRIBUTE)
message(FATAL_ERROR "Can't use brpc rdma in no distribute env.") message(FATAL_ERROR "Can't use brpc rdma in no distribute env.")
endif() endif()
endif() endif()
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
# lite subgraph compilation depends on CUDNN_ROOT, # lite subgraph compilation depends on CUDNN_ROOT,
# so include(cudnn) needs to be in front of include(third_party/lite) # so include(cudnn) needs to be in front of include(third_party/lite)
include(cudnn) # set cudnn libraries, must before configure include(cudnn) # set cudnn libraries, must before configure
include(tensorrt) include(tensorrt)
# there is no official support of nccl, cupti in windows # there is no official support of nccl, cupti in windows
if(NOT WIN32) if(NOT WIN32)
include(cupti) include(cupti)
endif() endif()
endif() endif()
if(WITH_MLU) if(WITH_MLU)
include(neuware) include(neuware)
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
include(hip) include(hip)
include(miopen) # set miopen libraries, must before configure include(miopen) # set miopen libraries, must before configure
endif(WITH_ROCM) endif(WITH_ROCM)
if(WITH_XPU_KP) if(WITH_XPU_KP)
include(xpu_kp) include(xpu_kp)
endif() endif()
if (NOT WITH_ROCM AND WITH_RCCL) if(NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING message(
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.") WARNING "Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
set(WITH_RCCL OFF CACHE STRING set(WITH_RCCL
"Disable RCCL when compiling without ROCM" FORCE) OFF
CACHE STRING "Disable RCCL when compiling without ROCM" FORCE)
endif() endif()
if(WITH_RCCL) if(WITH_RCCL)
add_definitions("-DPADDLE_WITH_RCCL") add_definitions("-DPADDLE_WITH_RCCL")
include(rccl) include(rccl)
else() else()
if(WITH_ROCM) if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.") message(
endif() WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif()
endif() endif()
if(WITH_HETERPS AND WITH_PSLIB) if(WITH_HETERPS AND WITH_PSLIB)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
endif() endif()
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(LINUX) if(LINUX)
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE) set(WITH_GLOO
endif() ON
if(WITH_ASCEND_CL) CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
# disable WITH_PSCORE for NPU before include third_party endif()
MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.") if(WITH_ASCEND_CL)
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE) # disable WITH_PSCORE for NPU before include third_party
endif() message(
endif() WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
include(third_party) # download, build, install third_party, Contains about 20+ dependencies set(WITH_PSCORE
OFF
include(flags) # set paddle compile flags CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
endif()
endif()
include(third_party
)# download, build, install third_party, Contains about 20+ dependencies
include(flags) # set paddle compile flags
if(WITH_PROFILER) if(WITH_PROFILER)
find_package(Gperftools REQUIRED) find_package(Gperftools REQUIRED)
include_directories(${GPERFTOOLS_INCLUDE_DIR}) include_directories(${GPERFTOOLS_INCLUDE_DIR})
add_definitions(-DWITH_GPERFTOOLS) add_definitions(-DWITH_GPERFTOOLS)
endif() endif()
include(util) # set unittest and link libs include(util) # set unittest and link libs
include(version) # set PADDLE_VERSION include(version) # set PADDLE_VERSION
include(coveralls) # set code coverage include(coveralls) # set code coverage
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
if(WITH_NV_JETSON) if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE) set(WITH_ARM
ON
CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
endif() endif()
if(WITH_ARM) if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE) set(WITH_XBYAK
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE) OFF
set(WITH_AVX OFF CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE) CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE)
add_definitions(-DPADDLE_WITH_ARM) set(WITH_MKL
endif() OFF
CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE)
if (WITH_SW) set(WITH_AVX
# mieee flag solves floating-point exceptions under sw and ALPHA architectures OFF
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -mieee") CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -mieee") add_definitions(-DPADDLE_WITH_ARM)
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_SW=ON" FORCE) endif()
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_SW=ON." FORCE)
add_definitions(-DPADDLE_WITH_SW) if(WITH_SW)
endif() # mieee flag solves floating-point exceptions under sw and ALPHA architectures
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -mieee")
if (WITH_MIPS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -mieee")
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_MIPS=ON" FORCE) set(WITH_XBYAK
add_definitions(-DPADDLE_WITH_MIPS) OFF
endif() CACHE STRING "Disable XBYAK when compiling WITH_SW=ON" FORCE)
set(WITH_MKL
if (WITH_ONEMKL) OFF
add_definitions(-DPADDLE_WITH_ONEMKL) CACHE STRING "Disable MKL when compiling WITH_SW=ON." FORCE)
endif() add_definitions(-DPADDLE_WITH_SW)
endif()
if (WITH_HETERPS)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) if(WITH_MIPS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") set(WITH_XBYAK
endif() OFF
CACHE STRING "Disable XBYAK when compiling WITH_MIPS=ON" FORCE)
add_definitions(-DPADDLE_WITH_MIPS)
endif()
if(WITH_ONEMKL)
add_definitions(-DPADDLE_WITH_ONEMKL)
endif()
if(WITH_HETERPS)
if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")
endif()
endif() endif()
set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
...@@ -450,25 +537,32 @@ set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") ...@@ -450,25 +537,32 @@ set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_definitions(-DPADDLE_DLL_EXPORT) add_definitions(-DPADDLE_DLL_EXPORT)
if(ON_INFER) if(ON_INFER)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF # you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message(STATUS "On inference mode, will take place some specific optimization.") message(
include(inference_lib) STATUS "On inference mode, will take place some specific optimization.")
add_definitions(-DPADDLE_ON_INFERENCE) include(inference_lib)
add_definitions(-DPADDLE_ON_INFERENCE)
else() else()
#TODO(luotao), combine this warning with `make inference_lib_dist` command. #TODO(luotao), combine this warning with `make inference_lib_dist` command.
message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.") message(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
endif() endif()
if(WITH_STRIP) if(WITH_STRIP)
find_program(STRIP_PATH strip) find_program(STRIP_PATH strip)
if(NOT STRIP_PATH OR NOT LINUX) if(NOT STRIP_PATH OR NOT LINUX)
set(WITH_STRIP OFF CACHE STRING "Command strip is only used on Linux when it exists." FORCE) set(WITH_STRIP
endif() OFF
CACHE STRING "Command strip is only used on Linux when it exists."
FORCE)
endif()
endif() endif()
add_subdirectory(paddle) add_subdirectory(paddle)
if(WITH_PYTHON) if(WITH_PYTHON)
add_subdirectory(python) add_subdirectory(python)
endif() endif()
get_directory_property(all_inc_dirs INCLUDE_DIRECTORIES) get_directory_property(all_inc_dirs INCLUDE_DIRECTORIES)
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include(experiments/cuda_module_loading_lazy)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
if(LINUX)
if(NOT ${ON_INFER} OR NOT ${LINUX})
message(
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
)
return()
endif()
if(NOT ${CUDA_FOUND})
message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA")
return()
endif()
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.7")
message("cuda 11.7+ already support lazy module loading")
return()
endif()
message(
"for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a"
)
set(CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE BOOL "" FORCE)
set(CMAKE_CUDA_FLAGS "--cudart shared")
enable_language(CUDA)
execute_process(
COMMAND "rm" "-rf" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
COMMAND "bash" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" "${CUDA_TOOLKIT_ROOT_DIR}")
execute_process(COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy")
set(CUDA_NVCC_EXECUTABLE
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
CACHE FILEPATH "" FORCE)
set(CMAKE_CUDA_COMPILER
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
CACHE FILEPATH "" FORCE)
endif()
#!/usr/bin/env bash
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "#!/usr/bin/env bash" >> $1
echo "unset GREP_OPTIONS" >> $1
echo "set -e" >> $1
echo -e >> $1
echo "# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved." >> $1
echo "#" >> $1
echo "# Licensed under the Apache License, Version 2.0 (the \"License\");" >> $1
echo "# you may not use this file except in compliance with the License." >> $1
echo "# You may obtain a copy of the License at" >> $1
echo "#" >> $1
echo "# http://www.apache.org/licenses/LICENSE-2.0" >> $1
echo "#" >> $1
echo "# Unless required by applicable law or agreed to in writing, software" >> $1
echo "# distributed under the License is distributed on an \"AS IS\" BASIS," >> $1
echo "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." >> $1
echo "# See the License for the specific language governing permissions and" >> $1
echo "# limitations under the License." >> $1
echo -e >> $1
echo -e >> $1
echo "## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY" >> $1
echo -e >> $1
echo "# set cicc PATH for Centos" >> $1
echo "export PATH=\$PATH:$2/nvvm/bin" >> $1
echo -e >> $1
echo "# check nvcc version, if nvcc >= 11.7, just run nvcc itself" >> $1
echo "CUDA_VERSION=\$(nvcc --version | grep -oP '(?<=V)\d*\.\d*')" >> $1
echo "CUDA_VERSION_MAJOR=\${CUDA_VERSION%.*}" >> $1
echo "CUDA_VERSION_MINOR=\${CUDA_VERSION#*.}" >> $1
echo "if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then" >> $1
echo " nvcc \"\$@\"" >> $1
echo " exit" >> $1
echo "fi" >> $1
echo -e >> $1
echo "BUILDDIR=\$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)" >> $1
echo "echo \"\$@\" > \${BUILDDIR}/args" >> $1
echo "BUILDSH=\${BUILDDIR}/build.sh" >> $1
echo "$2/bin/nvcc --dryrun --keep --keep-dir=\${BUILDDIR} \"\$@\" 2>&1 | sed -e 's/#\\$ //;/^rm/d' > \$BUILDSH" >> $1
echo "sed -i -e '/^\s*--/d' \$BUILDSH" >> $1
echo "sed -ne '1,/^cicc.*cudafe1.stub.c/p' \${BUILDSH} > \${BUILDSH}.pre" >> $1
echo "sed -e '1,/^cicc.*cudafe1.stub.c/d' \${BUILDSH} > \${BUILDSH}.post" >> $1
echo -e >> $1
echo "sed -i -e '/LIBRARIES=/{s/\s//g;s/\"\"/ /g}' \${BUILDSH}.pre" >> $1
echo -e >> $1
echo "/usr/bin/env bash \${BUILDSH}.pre" >> $1
echo "STUBF=\$(find \$BUILDDIR -name *.cudafe1.stub.c)" >> $1
echo "CUFILE=\$(basename -s '.cudafe1.stub.c' \$STUBF)" >> $1
echo "sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' \$STUBF" >> $1
echo "sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' \$STUBF" >> $1
echo "# sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\\\"===> \${CUFILE} lazy-load? %d\\\\\\\\n\\\", l); __do____cudaRegisterAll();}\" \$STUBF" >> $1
echo "sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}\" \$STUBF" >> $1
echo "sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' \$STUBF" >> $1
echo "sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' \$STUBF" >> $1
echo "/usr/bin/env bash \${BUILDSH}.post" >> $1
echo "rm -rf \$BUILDDIR" >> $1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册