Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
210fa777
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
210fa777
编写于
8月 08, 2022
作者:
X
xiaoxiaohehe001
提交者:
GitHub
8月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
nvcclazylinux (#44957)
上级
30b66f03
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
537 addition
and
301 deletion
+537
-301
CMakeLists.txt
CMakeLists.txt
+395
-301
cmake/experimental.cmake
cmake/experimental.cmake
+17
-0
cmake/experiments/cuda_module_loading_lazy.cmake
cmake/experiments/cuda_module_loading_lazy.cmake
+55
-0
tools/nvcc_lazy.sh
tools/nvcc_lazy.sh
+70
-0
未找到文件。
CMakeLists.txt
浏览文件 @
210fa777
...
...
@@ -28,7 +28,10 @@ include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if
(
NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE
)
endif
()
...
...
@@ -57,20 +60,21 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME"
# Note(zhouwei): It use option above, so put here
include
(
init
)
include
(
generic
)
# simplify cmake module
include
(
experimental
)
# experimental build options
if
(
WITH_GPU
AND WITH_XPU
)
if
(
WITH_GPU
AND WITH_XPU
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_XPU_KP
)
if
(
WITH_GPU AND WITH_XPU_KP
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU2 at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ASCEND
)
if
(
WITH_GPU AND WITH_ASCEND
)
message
(
FATAL_ERROR
"Error when compile GPU and ASCEND at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ROCM
)
if
(
WITH_GPU AND WITH_ROCM
)
message
(
FATAL_ERROR
"Error when compile CUDA and ROCM at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_MLU
)
if
(
WITH_GPU AND WITH_MLU
)
message
(
FATAL_ERROR
"Error when compile GPU and MLU at the same time"
)
endif
()
...
...
@@ -88,7 +92,9 @@ message(STATUS "AR tools: ${CMAKE_AR}")
# MUSL build turn off warnings
if
(
WITH_MUSL
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
endif
()
if
(
APPLE AND WITH_ARM
)
...
...
@@ -118,13 +124,23 @@ if(WIN32)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/Zc:inline"
)
endif
()
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
...
...
@@ -134,11 +150,18 @@ if(WIN32)
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions
(
"-DNOMINMAX"
)
# windows build turn off warnings, use parallel compiling.
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
string
(
REGEX REPLACE
"/W[1-4]"
" /W0 "
${
flag_var
}
"
${${
flag_var
}}
"
)
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
...
...
@@ -162,21 +185,28 @@ if(WIN32)
endif
()
endforeach
(
flag_var
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
if
(
MSVC_STATIC_CRT
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/NODEFAULTLIB:MSVCRT.LIB"
)
endif
()
endforeach
(
flag_var
)
if
(
WITH_WIN_DUMP_DBG
)
if
(
WITH_WIN_DUMP_DBG
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/Zi"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/Zi"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/DEBUG /OPT:REF /OPT:ICF"
)
endforeach
(
flag_var
)
...
...
@@ -184,7 +214,9 @@ if(WIN32)
endif
()
else
(
WIN32
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
endif
(
WIN32
)
find_package
(
Git REQUIRED
)
...
...
@@ -207,13 +239,16 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
WITH_DISTRIBUTE
"Compile with distributed support"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
################################ Internal Configurations #######################################
option
(
WITH_NV_JETSON
"Compile PaddlePaddle with NV JETSON"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
OPTION
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
option
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
option
(
WITH_PSLIB
"Compile with pslib support"
OFF
)
option
(
WITH_BOX_PS
"Compile with box_ps support"
OFF
)
...
...
@@ -221,10 +256,14 @@ option(WITH_XBYAK "Compile with xbyak support" ON)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
WITH_PSCORE
"Compile with parameter server support"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_HETERPS
"Compile with heterps"
OFF}
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
WITH_LITE
"Compile Paddle Fluid with Lite Engine"
OFF
)
option
(
WITH_CINN
"Compile PaddlePaddle with CINN"
OFF
)
option
(
WITH_INFRT
"Compile PaddlePaddle with INFRT"
OFF
)
...
...
@@ -239,19 +278,29 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option
(
WITH_MUSL
"Compile with musl libc instead of gblic"
OFF
)
option
(
WITH_UNITY_BUILD
"Compile with UnityBuild mode"
OFF
)
option
(
WITH_STRIP
"Strip so files of Whl packages"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
WITH_ASCEND_INT64
"Compile with int64 kernel for ascend NPU"
OFF
)
option
(
WITH_POCKETFFT
"Compile with pocketfft support"
ON
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_CUSTOM_DEVICE
"Compile with custom device support"
OFF
)
if
(
WITH_RECORD_BUILDTIME
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
else
()
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif
()
unset
(
WITH_RECORD_BUILDTIME CACHE
)
...
...
@@ -261,59 +310,70 @@ if(NOT PY_VERSION)
endif
()
set
(
PYBIND11_PYTHON_VERSION
${
PY_VERSION
}
)
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
message
(
"Choose the correct type of sanitizer"
)
return
()
endif
()
if
(
LINUX AND NOT WITH_CUSTOM_DEVICE AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
if
(
LINUX
AND NOT WITH_CUSTOM_DEVICE
AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
endif
()
if
(
WIN32
)
if
(
WITH_DISTRIBUTE
)
MESSAGE
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
message
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE
OFF
CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
endif
()
if
(
WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
message
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
endif
()
endif
()
if
(
NOT WITH_GPU AND WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
if
(
NOT WITH_GPU AND WITH_NCCL
)
message
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
endif
()
# force WITH_XPU on when WITH_XPU_KP
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
MESSAGE
(
WARNING
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
message
(
WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON."
)
set
(
WITH_XPU ON CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
set
(
WITH_XPU
ON
CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
endif
()
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
MESSAGE
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL OFF CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
message
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL
OFF
CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
endif
()
if
(
NOT WITH_MLU AND WITH_CNCL
)
MESSAGE
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU OFF CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
if
(
NOT WITH_MLU AND WITH_CNCL
)
message
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU
OFF
CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
endif
()
if
(
WITH_NCCL
)
...
...
@@ -321,7 +381,10 @@ if(WITH_NCCL)
include
(
nccl
)
else
()
if
(
WITH_GPU
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
message
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
...
...
@@ -332,7 +395,6 @@ if(WITH_BRPC_RDMA)
endif
()
endif
()
if
(
WITH_GPU
)
include
(
cuda
)
# lite subgraph compilation depends on CUDNN_ROOT,
...
...
@@ -358,11 +420,12 @@ if(WITH_XPU_KP)
include
(
xpu_kp
)
endif
()
if
(
NOT WITH_ROCM AND WITH_RCCL
)
MESSAGE
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL OFF CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
if
(
NOT WITH_ROCM AND WITH_RCCL
)
message
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL
OFF
CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
endif
()
if
(
WITH_RCCL
)
...
...
@@ -370,7 +433,10 @@ if(WITH_RCCL)
include
(
rccl
)
else
()
if
(
WITH_ROCM
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
message
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
...
...
@@ -380,16 +446,23 @@ endif()
if
(
WITH_DISTRIBUTE
)
if
(
LINUX
)
set
(
WITH_GLOO ON CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
set
(
WITH_GLOO
ON
CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
endif
()
if
(
WITH_ASCEND_CL
)
# disable WITH_PSCORE for NPU before include third_party
MESSAGE
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE OFF CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
message
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE
OFF
CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
endif
()
endif
()
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
flags
)
# set paddle compile flags
...
...
@@ -407,38 +480,52 @@ include(configure) # add paddle env configuration
include_directories
(
"
${
PADDLE_SOURCE_DIR
}
"
)
if
(
WITH_NV_JETSON
)
set
(
WITH_ARM ON CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
set
(
WITH_ARM
ON
CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
endif
()
if
(
WITH_ARM
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX OFF CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX
OFF
CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
add_definitions
(
-DPADDLE_WITH_ARM
)
endif
()
if
(
WITH_SW
)
if
(
WITH_SW
)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC -mieee"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC -mieee"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_SW
)
endif
()
if
(
WITH_MIPS
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
if
(
WITH_MIPS
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
add_definitions
(
-DPADDLE_WITH_MIPS
)
endif
()
if
(
WITH_ONEMKL
)
if
(
WITH_ONEMKL
)
add_definitions
(
-DPADDLE_WITH_ONEMKL
)
endif
()
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-faligned-new"
)
endif
()
endif
()
...
...
@@ -451,18 +538,25 @@ add_definitions(-DPADDLE_DLL_EXPORT)
if
(
ON_INFER
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
include
(
inference_lib
)
add_definitions
(
-DPADDLE_ON_INFERENCE
)
else
()
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
endif
()
if
(
WITH_STRIP
)
find_program
(
STRIP_PATH strip
)
if
(
NOT STRIP_PATH OR NOT LINUX
)
set
(
WITH_STRIP OFF CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
set
(
WITH_STRIP
OFF
CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
endif
()
endif
()
...
...
cmake/experimental.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include
(
experiments/cuda_module_loading_lazy
)
cmake/experiments/cuda_module_loading_lazy.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
if
(
LINUX
)
if
(
NOT
${
ON_INFER
}
OR NOT
${
LINUX
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
)
return
()
endif
()
if
(
NOT
${
CUDA_FOUND
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA"
)
return
()
endif
()
if
(
${
CUDA_VERSION
}
VERSION_GREATER_EQUAL
"11.7"
)
message
(
"cuda 11.7+ already support lazy module loading"
)
return
()
endif
()
message
(
"for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a"
)
set
(
CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE BOOL
""
FORCE
)
set
(
CMAKE_CUDA_FLAGS
"--cudart shared"
)
enable_language
(
CUDA
)
execute_process
(
COMMAND
"rm"
"-rf"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
COMMAND
"bash"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
"
${
CUDA_TOOLKIT_ROOT_DIR
}
"
)
execute_process
(
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
)
set
(
CUDA_NVCC_EXECUTABLE
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
set
(
CMAKE_CUDA_COMPILER
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
endif
()
tools/nvcc_lazy.sh
0 → 100644
浏览文件 @
210fa777
#!/usr/bin/env bash
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo
"#!/usr/bin/env bash"
>>
$1
echo
"unset GREP_OPTIONS"
>>
$1
echo
"set -e"
>>
$1
echo
-e
>>
$1
echo
"# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved."
>>
$1
echo
"#"
>>
$1
echo
"# Licensed under the Apache License, Version 2.0 (the
\"
License
\"
);"
>>
$1
echo
"# you may not use this file except in compliance with the License."
>>
$1
echo
"# You may obtain a copy of the License at"
>>
$1
echo
"#"
>>
$1
echo
"# http://www.apache.org/licenses/LICENSE-2.0"
>>
$1
echo
"#"
>>
$1
echo
"# Unless required by applicable law or agreed to in writing, software"
>>
$1
echo
"# distributed under the License is distributed on an
\"
AS IS
\"
BASIS,"
>>
$1
echo
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
>>
$1
echo
"# See the License for the specific language governing permissions and"
>>
$1
echo
"# limitations under the License."
>>
$1
echo
-e
>>
$1
echo
-e
>>
$1
echo
"## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY"
>>
$1
echo
-e
>>
$1
echo
"# set cicc PATH for Centos"
>>
$1
echo
"export PATH=
\$
PATH:
$2
/nvvm/bin"
>>
$1
echo
-e
>>
$1
echo
"# check nvcc version, if nvcc >= 11.7, just run nvcc itself"
>>
$1
echo
"CUDA_VERSION=
\$
(nvcc --version | grep -oP '(?<=V)
\d
*
\.\d
*')"
>>
$1
echo
"CUDA_VERSION_MAJOR=
\$
{CUDA_VERSION%.*}"
>>
$1
echo
"CUDA_VERSION_MINOR=
\$
{CUDA_VERSION#*.}"
>>
$1
echo
"if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then"
>>
$1
echo
" nvcc
\"\$
@
\"
"
>>
$1
echo
" exit"
>>
$1
echo
"fi"
>>
$1
echo
-e
>>
$1
echo
"BUILDDIR=
\$
(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)"
>>
$1
echo
"echo
\"\$
@
\"
>
\$
{BUILDDIR}/args"
>>
$1
echo
"BUILDSH=
\$
{BUILDDIR}/build.sh"
>>
$1
echo
"
$2
/bin/nvcc --dryrun --keep --keep-dir=
\$
{BUILDDIR}
\"\$
@
\"
2>&1 | sed -e 's/#
\\
$
//;/^rm/d' >
\$
BUILDSH"
>>
$1
echo
"sed -i -e '/^
\s
*--/d'
\$
BUILDSH"
>>
$1
echo
"sed -ne '1,/^cicc.*cudafe1.stub.c/p'
\$
{BUILDSH} >
\$
{BUILDSH}.pre"
>>
$1
echo
"sed -e '1,/^cicc.*cudafe1.stub.c/d'
\$
{BUILDSH} >
\$
{BUILDSH}.post"
>>
$1
echo
-e
>>
$1
echo
"sed -i -e '/LIBRARIES=/{s/
\s
//g;s/
\"\"
/ /g}'
\$
{BUILDSH}.pre"
>>
$1
echo
-e
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.pre"
>>
$1
echo
"STUBF=
\$
(find
\$
BUILDDIR -name *.cudafe1.stub.c)"
>>
$1
echo
"CUFILE=
\$
(basename -s '.cudafe1.stub.c'
\$
STUBF)"
>>
$1
echo
"sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/__sti____cudaRegisterAll
\(
.*{
\)
/__do____cudaRegisterAll
\1
/'
\$
STUBF"
>>
$1
echo
"# sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,
\\\"
===>
\$
{CUFILE} lazy-load? %d
\\\\\\\\
n
\\\"
, l); __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e '/__try____cudaRegisterAll
\(
.*{
\)
/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/{
\(
__device_stub__
\)
/{__try____cudaRegisterAll(1);
\1
/'
\$
STUBF"
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.post"
>>
$1
echo
"rm -rf
\$
BUILDDIR"
>>
$1
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录