未验证 提交 210fa777 编写于 作者: X xiaoxiaohehe001 提交者: GitHub

nvcclazylinux (#44957)

上级 30b66f03
...@@ -28,7 +28,10 @@ include(system) ...@@ -28,7 +28,10 @@ include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug # Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING set(CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE) FORCE)
endif() endif()
...@@ -57,20 +60,21 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" ...@@ -57,20 +60,21 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME"
# Note(zhouwei): It use option above, so put here # Note(zhouwei): It use option above, so put here
include(init) include(init)
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(experimental) # experimental build options
if (WITH_GPU AND WITH_XPU) if(WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time") message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif() endif()
if (WITH_GPU AND WITH_XPU_KP) if(WITH_GPU AND WITH_XPU_KP)
message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time") message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time")
endif() endif()
if (WITH_GPU AND WITH_ASCEND) if(WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time") message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif() endif()
if (WITH_GPU AND WITH_ROCM) if(WITH_GPU AND WITH_ROCM)
message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time") message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
endif() endif()
if (WITH_GPU AND WITH_MLU) if(WITH_GPU AND WITH_MLU)
message(FATAL_ERROR "Error when compile GPU and MLU at the same time") message(FATAL_ERROR "Error when compile GPU and MLU at the same time")
endif() endif()
...@@ -88,7 +92,9 @@ message(STATUS "AR tools: ${CMAKE_AR}") ...@@ -88,7 +92,9 @@ message(STATUS "AR tools: ${CMAKE_AR}")
# MUSL build turn off warnings # MUSL build turn off warnings
if(WITH_MUSL) if(WITH_MUSL)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy") set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
endif() endif()
if(APPLE AND WITH_ARM) if(APPLE AND WITH_ARM)
...@@ -118,13 +124,23 @@ if(WIN32) ...@@ -118,13 +124,23 @@ if(WIN32)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zc:inline")
endif() endif()
if (MSVC_STATIC_CRT) if(MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019") message(
foreach(flag_var STATUS
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO )
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE foreach(
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
if(${flag_var} MATCHES "/MD") if(${flag_var} MATCHES "/MD")
string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
endif() endif()
...@@ -134,11 +150,18 @@ if(WIN32) ...@@ -134,11 +150,18 @@ if(WIN32)
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally # NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions("-DNOMINMAX") add_definitions("-DNOMINMAX")
# windows build turn off warnings, use parallel compiling. # windows build turn off warnings, use parallel compiling.
foreach(flag_var foreach(
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE flag_var
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO CMAKE_CXX_FLAGS
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO) CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO)
string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}") string(REGEX REPLACE "/W[1-4]" " /W0 " ${flag_var} "${${flag_var}}")
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling, # NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
...@@ -162,21 +185,28 @@ if(WIN32) ...@@ -162,21 +185,28 @@ if(WIN32)
endif() endif()
endforeach(flag_var) endforeach(flag_var)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") set(CMAKE_C_FLAGS
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838") "${CMAKE_C_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS) set(CMAKE_CXX_FLAGS
set(${flag_var} "${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221") "${CMAKE_CXX_FLAGS} /wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var}
"${${flag_var}} /ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221")
if(MSVC_STATIC_CRT) if(MSVC_STATIC_CRT)
set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB") set(${flag_var} "${${flag_var}} /NODEFAULTLIB:MSVCRT.LIB")
endif() endif()
endforeach(flag_var) endforeach(flag_var)
if (WITH_WIN_DUMP_DBG) if(WITH_WIN_DUMP_DBG)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Zi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zi")
foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS) foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS)
set(${flag_var} "${${flag_var}} /DEBUG /OPT:REF /OPT:ICF") set(${flag_var} "${${flag_var}} /DEBUG /OPT:REF /OPT:ICF")
endforeach(flag_var) endforeach(flag_var)
...@@ -184,7 +214,9 @@ if(WIN32) ...@@ -184,7 +214,9 @@ if(WIN32)
endif() endif()
else(WIN32) else(WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations") set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
endif(WIN32) endif(WIN32)
find_package(Git REQUIRED) find_package(Git REQUIRED)
...@@ -207,13 +239,16 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO ...@@ -207,13 +239,16 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization and inference-lib generation" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation"
OFF)
################################ Internal Configurations ####################################### ################################ Internal Configurations #######################################
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
OFF)
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF) option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)
option(WITH_INCREMENTAL_COVERAGE "Generate coverage reports only for incremental code" OFF) option(WITH_INCREMENTAL_COVERAGE
OPTION(WITH_LIBXSMM "Compile with libxsmm" OFF) "Generate coverage reports only for incremental code" OFF)
option(WITH_LIBXSMM "Compile with libxsmm" OFF)
option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF) option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(WITH_PSLIB "Compile with pslib support" OFF) option(WITH_PSLIB "Compile with pslib support" OFF)
option(WITH_BOX_PS "Compile with box_ps support" OFF) option(WITH_BOX_PS "Compile with box_ps support" OFF)
...@@ -221,10 +256,14 @@ option(WITH_XBYAK "Compile with xbyak support" ON) ...@@ -221,10 +256,14 @@ option(WITH_XBYAK "Compile with xbyak support" ON)
option(WITH_CONTRIB "Compile the third-party contributation" OFF) option(WITH_CONTRIB "Compile the third-party contributation" OFF)
option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE}) option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option(WITH_HETERPS "Compile with heterps" OFF}) option(WITH_HETERPS "Compile with heterps" OFF})
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF) option(WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE}) option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
option(SANITIZER_TYPE "Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined" OFF) option(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF)
option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF) option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
option(WITH_CINN "Compile PaddlePaddle with CINN" OFF) option(WITH_CINN "Compile PaddlePaddle with CINN" OFF)
option(WITH_INFRT "Compile PaddlePaddle with INFRT" OFF) option(WITH_INFRT "Compile PaddlePaddle with INFRT" OFF)
...@@ -239,19 +278,29 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF) ...@@ -239,19 +278,29 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF) option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF) option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF) option(WITH_STRIP "Strip so files of Whl packages" OFF)
option(NEW_RELEASE_PYPI "PaddlePaddle next-level release strategy for pypi cubin package" OFF) option(NEW_RELEASE_PYPI
option(NEW_RELEASE_ALL "PaddlePaddle next-level release strategy for all arches cubin package" OFF) "PaddlePaddle next-level release strategy for pypi cubin package" OFF)
option(NEW_RELEASE_JIT "PaddlePaddle next-level release strategy for backup jit package" OFF) option(NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF)
option(NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package" OFF)
option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF) option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF)
option(WITH_POCKETFFT "Compile with pocketfft support" ON) option(WITH_POCKETFFT "Compile with pocketfft support" ON)
option(WITH_RECORD_BUILDTIME "Compile PaddlePaddle with record all targets build time" OFF) option(WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time" OFF)
option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF) option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF)
if(WITH_RECORD_BUILDTIME) if(WITH_RECORD_BUILDTIME)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh") set_property(
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK "${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh") GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
set_property(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"${CMAKE_CURRENT_SOURCE_DIR}/tools/get_build_time.sh")
else() else()
include(ccache) # set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache include(ccache
)# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif() endif()
unset(WITH_RECORD_BUILDTIME CACHE) unset(WITH_RECORD_BUILDTIME CACHE)
...@@ -261,59 +310,70 @@ if(NOT PY_VERSION) ...@@ -261,59 +310,70 @@ if(NOT PY_VERSION)
endif() endif()
set(PYBIND11_PYTHON_VERSION ${PY_VERSION}) set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF # the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES "^(Address|Leak|Memory|Thread|Undefined)$") if(SANITIZER_TYPE AND NOT "${SANITIZER_TYPE}" MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$")
message("Choose the correct type of sanitizer") message("Choose the correct type of sanitizer")
return() return()
endif() endif()
if (LINUX AND NOT WITH_CUSTOM_DEVICE AND NOT ON_INFER) if(LINUX
set(WITH_CUSTOM_DEVICE ON) AND NOT WITH_CUSTOM_DEVICE
AND NOT ON_INFER)
set(WITH_CUSTOM_DEVICE ON)
endif() endif()
if(WIN32) if(WIN32)
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
MESSAGE(WARNING message(
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF.") WARNING
set(WITH_DISTRIBUTE OFF CACHE STRING "Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
"Disable DISTRIBUTE when compiling for Windows" FORCE) )
set(WITH_DISTRIBUTE
OFF
CACHE STRING "Disable DISTRIBUTE when compiling for Windows" FORCE)
endif() endif()
if(WITH_NCCL) if(WITH_NCCL)
MESSAGE(WARNING message(
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF.") WARNING "Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF.")
set(WITH_NCCL OFF CACHE STRING set(WITH_NCCL
"Disable NCCL when compiling for Windows" FORCE) OFF
CACHE STRING "Disable NCCL when compiling for Windows" FORCE)
endif() endif()
endif() endif()
if (NOT WITH_GPU AND WITH_NCCL) if(NOT WITH_GPU AND WITH_NCCL)
MESSAGE(WARNING message(
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.") WARNING "Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF.")
set(WITH_NCCL OFF CACHE STRING set(WITH_NCCL
"Disable NCCL when compiling without GPU" FORCE) OFF
CACHE STRING "Disable NCCL when compiling without GPU" FORCE)
endif() endif()
# force WITH_XPU on when WITH_XPU_KP # force WITH_XPU on when WITH_XPU_KP
if (WITH_XPU_KP AND NOT WITH_XPU) if(WITH_XPU_KP AND NOT WITH_XPU)
MESSAGE(WARNING message(
WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.") "Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.")
set(WITH_XPU ON CACHE STRING set(WITH_XPU
"Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE) ON
CACHE STRING "Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE)
endif() endif()
if (NOT WITH_XPU AND WITH_XPU_BKCL) if(NOT WITH_XPU AND WITH_XPU_BKCL)
MESSAGE(WARNING message(
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.") WARNING "Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
set(WITH_XPU_BKCL OFF CACHE STRING set(WITH_XPU_BKCL
"Disable BKCL when compiling without XPU" FORCE) OFF
CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
endif() endif()
if (NOT WITH_MLU AND WITH_CNCL) if(NOT WITH_MLU AND WITH_CNCL)
MESSAGE(WARNING message(
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.") WARNING "Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.")
set(WITH_MLU OFF CACHE STRING set(WITH_MLU
"Disable CNCL when compiling without MLU" FORCE) OFF
CACHE STRING "Disable CNCL when compiling without MLU" FORCE)
endif() endif()
if(WITH_NCCL) if(WITH_NCCL)
...@@ -321,7 +381,10 @@ if(WITH_NCCL) ...@@ -321,7 +381,10 @@ if(WITH_NCCL)
include(nccl) include(nccl)
else() else()
if(WITH_GPU) if(WITH_GPU)
MESSAGE(WARNING "If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used.") message(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif() endif()
endif() endif()
...@@ -332,7 +395,6 @@ if(WITH_BRPC_RDMA) ...@@ -332,7 +395,6 @@ if(WITH_BRPC_RDMA)
endif() endif()
endif() endif()
if(WITH_GPU) if(WITH_GPU)
include(cuda) include(cuda)
# lite subgraph compilation depends on CUDNN_ROOT, # lite subgraph compilation depends on CUDNN_ROOT,
...@@ -358,11 +420,12 @@ if(WITH_XPU_KP) ...@@ -358,11 +420,12 @@ if(WITH_XPU_KP)
include(xpu_kp) include(xpu_kp)
endif() endif()
if (NOT WITH_ROCM AND WITH_RCCL) if(NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING message(
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.") WARNING "Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
set(WITH_RCCL OFF CACHE STRING set(WITH_RCCL
"Disable RCCL when compiling without ROCM" FORCE) OFF
CACHE STRING "Disable RCCL when compiling without ROCM" FORCE)
endif() endif()
if(WITH_RCCL) if(WITH_RCCL)
...@@ -370,7 +433,10 @@ if(WITH_RCCL) ...@@ -370,7 +433,10 @@ if(WITH_RCCL)
include(rccl) include(rccl)
else() else()
if(WITH_ROCM) if(WITH_ROCM)
MESSAGE(WARNING "If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used.") message(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif() endif()
endif() endif()
...@@ -380,16 +446,23 @@ endif() ...@@ -380,16 +446,23 @@ endif()
if(WITH_DISTRIBUTE) if(WITH_DISTRIBUTE)
if(LINUX) if(LINUX)
set(WITH_GLOO ON CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE) set(WITH_GLOO
ON
CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
endif() endif()
if(WITH_ASCEND_CL) if(WITH_ASCEND_CL)
# disable WITH_PSCORE for NPU before include third_party # disable WITH_PSCORE for NPU before include third_party
MESSAGE(WARNING "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.") message(
set(WITH_PSCORE OFF CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE) WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
set(WITH_PSCORE
OFF
CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
endif() endif()
endif() endif()
include(third_party) # download, build, install third_party, Contains about 20+ dependencies include(third_party
)# download, build, install third_party, Contains about 20+ dependencies
include(flags) # set paddle compile flags include(flags) # set paddle compile flags
...@@ -407,38 +480,52 @@ include(configure) # add paddle env configuration ...@@ -407,38 +480,52 @@ include(configure) # add paddle env configuration
include_directories("${PADDLE_SOURCE_DIR}") include_directories("${PADDLE_SOURCE_DIR}")
if(WITH_NV_JETSON) if(WITH_NV_JETSON)
set(WITH_ARM ON CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE) set(WITH_ARM
ON
CACHE STRING "Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON." FORCE)
endif() endif()
if(WITH_ARM) if(WITH_ARM)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE) set(WITH_XBYAK
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE) OFF
set(WITH_AVX OFF CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE) CACHE STRING "Disable XBYAK when compiling WITH_ARM=ON." FORCE)
set(WITH_MKL
OFF
CACHE STRING "Disable MKL when compiling WITH_ARM=ON." FORCE)
set(WITH_AVX
OFF
CACHE STRING "Disable AVX when compiling WITH_AVX=OFF." FORCE)
add_definitions(-DPADDLE_WITH_ARM) add_definitions(-DPADDLE_WITH_ARM)
endif() endif()
if (WITH_SW) if(WITH_SW)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures # mieee flag solves floating-point exceptions under sw and ALPHA architectures
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -mieee") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -mieee")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -mieee") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -mieee")
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_SW=ON" FORCE) set(WITH_XBYAK
set(WITH_MKL OFF CACHE STRING "Disable MKL when compiling WITH_SW=ON." FORCE) OFF
CACHE STRING "Disable XBYAK when compiling WITH_SW=ON" FORCE)
set(WITH_MKL
OFF
CACHE STRING "Disable MKL when compiling WITH_SW=ON." FORCE)
add_definitions(-DPADDLE_WITH_SW) add_definitions(-DPADDLE_WITH_SW)
endif() endif()
if (WITH_MIPS) if(WITH_MIPS)
set(WITH_XBYAK OFF CACHE STRING "Disable XBYAK when compiling WITH_MIPS=ON" FORCE) set(WITH_XBYAK
OFF
CACHE STRING "Disable XBYAK when compiling WITH_MIPS=ON" FORCE)
add_definitions(-DPADDLE_WITH_MIPS) add_definitions(-DPADDLE_WITH_MIPS)
endif() endif()
if (WITH_ONEMKL) if(WITH_ONEMKL)
add_definitions(-DPADDLE_WITH_ONEMKL) add_definitions(-DPADDLE_WITH_ONEMKL)
endif() endif()
if (WITH_HETERPS) if(WITH_HETERPS)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -faligned-new")
endif() endif()
endif() endif()
...@@ -451,18 +538,25 @@ add_definitions(-DPADDLE_DLL_EXPORT) ...@@ -451,18 +538,25 @@ add_definitions(-DPADDLE_DLL_EXPORT)
if(ON_INFER) if(ON_INFER)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF # you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message(STATUS "On inference mode, will take place some specific optimization.") message(
STATUS "On inference mode, will take place some specific optimization.")
include(inference_lib) include(inference_lib)
add_definitions(-DPADDLE_ON_INFERENCE) add_definitions(-DPADDLE_ON_INFERENCE)
else() else()
#TODO(luotao), combine this warning with `make inference_lib_dist` command. #TODO(luotao), combine this warning with `make inference_lib_dist` command.
message(WARNING "On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only.") message(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
endif() endif()
if(WITH_STRIP) if(WITH_STRIP)
find_program(STRIP_PATH strip) find_program(STRIP_PATH strip)
if(NOT STRIP_PATH OR NOT LINUX) if(NOT STRIP_PATH OR NOT LINUX)
set(WITH_STRIP OFF CACHE STRING "Command strip is only used on Linux when it exists." FORCE) set(WITH_STRIP
OFF
CACHE STRING "Command strip is only used on Linux when it exists."
FORCE)
endif() endif()
endif() endif()
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include(experiments/cuda_module_loading_lazy)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
if(LINUX)
if(NOT ${ON_INFER} OR NOT ${LINUX})
message(
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
)
return()
endif()
if(NOT ${CUDA_FOUND})
message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA")
return()
endif()
if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.7")
message("cuda 11.7+ already support lazy module loading")
return()
endif()
message(
"for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a"
)
set(CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE BOOL "" FORCE)
set(CMAKE_CUDA_FLAGS "--cudart shared")
enable_language(CUDA)
execute_process(
COMMAND "rm" "-rf" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
COMMAND "bash" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh"
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" "${CUDA_TOOLKIT_ROOT_DIR}")
execute_process(COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy")
set(CUDA_NVCC_EXECUTABLE
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
CACHE FILEPATH "" FORCE)
set(CMAKE_CUDA_COMPILER
"${CMAKE_SOURCE_DIR}/tools/nvcc_lazy"
CACHE FILEPATH "" FORCE)
endif()
#!/usr/bin/env bash
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo "#!/usr/bin/env bash" >> $1
echo "unset GREP_OPTIONS" >> $1
echo "set -e" >> $1
echo -e >> $1
echo "# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved." >> $1
echo "#" >> $1
echo "# Licensed under the Apache License, Version 2.0 (the \"License\");" >> $1
echo "# you may not use this file except in compliance with the License." >> $1
echo "# You may obtain a copy of the License at" >> $1
echo "#" >> $1
echo "# http://www.apache.org/licenses/LICENSE-2.0" >> $1
echo "#" >> $1
echo "# Unless required by applicable law or agreed to in writing, software" >> $1
echo "# distributed under the License is distributed on an \"AS IS\" BASIS," >> $1
echo "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." >> $1
echo "# See the License for the specific language governing permissions and" >> $1
echo "# limitations under the License." >> $1
echo -e >> $1
echo -e >> $1
echo "## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY" >> $1
echo -e >> $1
echo "# set cicc PATH for Centos" >> $1
echo "export PATH=\$PATH:$2/nvvm/bin" >> $1
echo -e >> $1
echo "# check nvcc version, if nvcc >= 11.7, just run nvcc itself" >> $1
echo "CUDA_VERSION=\$(nvcc --version | grep -oP '(?<=V)\d*\.\d*')" >> $1
echo "CUDA_VERSION_MAJOR=\${CUDA_VERSION%.*}" >> $1
echo "CUDA_VERSION_MINOR=\${CUDA_VERSION#*.}" >> $1
echo "if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then" >> $1
echo " nvcc \"\$@\"" >> $1
echo " exit" >> $1
echo "fi" >> $1
echo -e >> $1
echo "BUILDDIR=\$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)" >> $1
echo "echo \"\$@\" > \${BUILDDIR}/args" >> $1
echo "BUILDSH=\${BUILDDIR}/build.sh" >> $1
echo "$2/bin/nvcc --dryrun --keep --keep-dir=\${BUILDDIR} \"\$@\" 2>&1 | sed -e 's/#\\$ //;/^rm/d' > \$BUILDSH" >> $1
echo "sed -i -e '/^\s*--/d' \$BUILDSH" >> $1
echo "sed -ne '1,/^cicc.*cudafe1.stub.c/p' \${BUILDSH} > \${BUILDSH}.pre" >> $1
echo "sed -e '1,/^cicc.*cudafe1.stub.c/d' \${BUILDSH} > \${BUILDSH}.post" >> $1
echo -e >> $1
echo "sed -i -e '/LIBRARIES=/{s/\s//g;s/\"\"/ /g}' \${BUILDSH}.pre" >> $1
echo -e >> $1
echo "/usr/bin/env bash \${BUILDSH}.pre" >> $1
echo "STUBF=\$(find \$BUILDDIR -name *.cudafe1.stub.c)" >> $1
echo "CUFILE=\$(basename -s '.cudafe1.stub.c' \$STUBF)" >> $1
echo "sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' \$STUBF" >> $1
echo "sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' \$STUBF" >> $1
echo "# sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\\\"===> \${CUFILE} lazy-load? %d\\\\\\\\n\\\", l); __do____cudaRegisterAll();}\" \$STUBF" >> $1
echo "sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}\" \$STUBF" >> $1
echo "sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' \$STUBF" >> $1
echo "sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' \$STUBF" >> $1
echo "/usr/bin/env bash \${BUILDSH}.post" >> $1
echo "rm -rf \$BUILDDIR" >> $1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册