Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
210fa777
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
210fa777
编写于
8月 08, 2022
作者:
X
xiaoxiaohehe001
提交者:
GitHub
8月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
nvcclazylinux (#44957)
上级
30b66f03
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
537 addition
and
301 deletion
+537
-301
CMakeLists.txt
CMakeLists.txt
+395
-301
cmake/experimental.cmake
cmake/experimental.cmake
+17
-0
cmake/experiments/cuda_module_loading_lazy.cmake
cmake/experiments/cuda_module_loading_lazy.cmake
+55
-0
tools/nvcc_lazy.sh
tools/nvcc_lazy.sh
+70
-0
未找到文件。
CMakeLists.txt
浏览文件 @
210fa777
...
...
@@ -28,7 +28,10 @@ include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if
(
NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE
)
endif
()
...
...
@@ -57,20 +60,21 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME"
# Note(zhouwei): It use option above, so put here
include
(
init
)
include
(
generic
)
# simplify cmake module
include
(
experimental
)
# experimental build options
if
(
WITH_GPU
AND WITH_XPU
)
if
(
WITH_GPU
AND WITH_XPU
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_XPU_KP
)
if
(
WITH_GPU AND WITH_XPU_KP
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU2 at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ASCEND
)
if
(
WITH_GPU AND WITH_ASCEND
)
message
(
FATAL_ERROR
"Error when compile GPU and ASCEND at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ROCM
)
if
(
WITH_GPU AND WITH_ROCM
)
message
(
FATAL_ERROR
"Error when compile CUDA and ROCM at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_MLU
)
if
(
WITH_GPU AND WITH_MLU
)
message
(
FATAL_ERROR
"Error when compile GPU and MLU at the same time"
)
endif
()
...
...
@@ -88,7 +92,9 @@ message(STATUS "AR tools: ${CMAKE_AR}")
# MUSL build turn off warnings
if
(
WITH_MUSL
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
endif
()
if
(
APPLE AND WITH_ARM
)
...
...
@@ -118,13 +124,23 @@ if(WIN32)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/Zc:inline"
)
endif
()
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
...
...
@@ -134,11 +150,18 @@ if(WIN32)
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions
(
"-DNOMINMAX"
)
# windows build turn off warnings, use parallel compiling.
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
string
(
REGEX REPLACE
"/W[1-4]"
" /W0 "
${
flag_var
}
"
${${
flag_var
}}
"
)
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
...
...
@@ -162,21 +185,28 @@ if(WIN32)
endif
()
endforeach
(
flag_var
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
if
(
MSVC_STATIC_CRT
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/NODEFAULTLIB:MSVCRT.LIB"
)
endif
()
endforeach
(
flag_var
)
if
(
WITH_WIN_DUMP_DBG
)
if
(
WITH_WIN_DUMP_DBG
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/Zi"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/Zi"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/DEBUG /OPT:REF /OPT:ICF"
)
endforeach
(
flag_var
)
...
...
@@ -184,7 +214,9 @@ if(WIN32)
endif
()
else
(
WIN32
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
endif
(
WIN32
)
find_package
(
Git REQUIRED
)
...
...
@@ -207,13 +239,16 @@ option(WITH_MKL "Compile PaddlePaddle with MKL support." ${AVX_FO
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
WITH_DISTRIBUTE
"Compile with distributed support"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
################################ Internal Configurations #######################################
option
(
WITH_NV_JETSON
"Compile PaddlePaddle with NV JETSON"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
OPTION
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
option
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
option
(
WITH_PSLIB
"Compile with pslib support"
OFF
)
option
(
WITH_BOX_PS
"Compile with box_ps support"
OFF
)
...
...
@@ -221,10 +256,14 @@ option(WITH_XBYAK "Compile with xbyak support" ON)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
WITH_PSCORE
"Compile with parameter server support"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_HETERPS
"Compile with heterps"
OFF}
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
WITH_LITE
"Compile Paddle Fluid with Lite Engine"
OFF
)
option
(
WITH_CINN
"Compile PaddlePaddle with CINN"
OFF
)
option
(
WITH_INFRT
"Compile PaddlePaddle with INFRT"
OFF
)
...
...
@@ -239,19 +278,29 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option
(
WITH_MUSL
"Compile with musl libc instead of gblic"
OFF
)
option
(
WITH_UNITY_BUILD
"Compile with UnityBuild mode"
OFF
)
option
(
WITH_STRIP
"Strip so files of Whl packages"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
WITH_ASCEND_INT64
"Compile with int64 kernel for ascend NPU"
OFF
)
option
(
WITH_POCKETFFT
"Compile with pocketfft support"
ON
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_CUSTOM_DEVICE
"Compile with custom device support"
OFF
)
if
(
WITH_RECORD_BUILDTIME
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
else
()
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif
()
unset
(
WITH_RECORD_BUILDTIME CACHE
)
...
...
@@ -261,59 +310,70 @@ if(NOT PY_VERSION)
endif
()
set
(
PYBIND11_PYTHON_VERSION
${
PY_VERSION
}
)
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
message
(
"Choose the correct type of sanitizer"
)
return
()
endif
()
if
(
LINUX AND NOT WITH_CUSTOM_DEVICE AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
if
(
LINUX
AND NOT WITH_CUSTOM_DEVICE
AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
endif
()
if
(
WIN32
)
if
(
WITH_DISTRIBUTE
)
MESSAGE
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
message
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE
OFF
CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
endif
()
if
(
WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
message
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
endif
()
endif
()
if
(
NOT WITH_GPU AND WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
if
(
NOT WITH_GPU AND WITH_NCCL
)
message
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
endif
()
# force WITH_XPU on when WITH_XPU_KP
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
MESSAGE
(
WARNING
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
message
(
WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON."
)
set
(
WITH_XPU ON CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
set
(
WITH_XPU
ON
CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
endif
()
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
MESSAGE
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL OFF CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
message
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL
OFF
CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
endif
()
if
(
NOT WITH_MLU AND WITH_CNCL
)
MESSAGE
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU OFF CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
if
(
NOT WITH_MLU AND WITH_CNCL
)
message
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU
OFF
CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
endif
()
if
(
WITH_NCCL
)
...
...
@@ -321,7 +381,10 @@ if(WITH_NCCL)
include
(
nccl
)
else
()
if
(
WITH_GPU
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
message
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
...
...
@@ -332,7 +395,6 @@ if(WITH_BRPC_RDMA)
endif
()
endif
()
if
(
WITH_GPU
)
include
(
cuda
)
# lite subgraph compilation depends on CUDNN_ROOT,
...
...
@@ -358,11 +420,12 @@ if(WITH_XPU_KP)
include
(
xpu_kp
)
endif
()
if
(
NOT WITH_ROCM AND WITH_RCCL
)
MESSAGE
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL OFF CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
if
(
NOT WITH_ROCM AND WITH_RCCL
)
message
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL
OFF
CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
endif
()
if
(
WITH_RCCL
)
...
...
@@ -370,7 +433,10 @@ if(WITH_RCCL)
include
(
rccl
)
else
()
if
(
WITH_ROCM
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
message
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
...
...
@@ -380,16 +446,23 @@ endif()
if
(
WITH_DISTRIBUTE
)
if
(
LINUX
)
set
(
WITH_GLOO ON CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
set
(
WITH_GLOO
ON
CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
endif
()
if
(
WITH_ASCEND_CL
)
# disable WITH_PSCORE for NPU before include third_party
MESSAGE
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE OFF CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
message
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE
OFF
CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
endif
()
endif
()
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
flags
)
# set paddle compile flags
...
...
@@ -407,38 +480,52 @@ include(configure) # add paddle env configuration
include_directories
(
"
${
PADDLE_SOURCE_DIR
}
"
)
if
(
WITH_NV_JETSON
)
set
(
WITH_ARM ON CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
set
(
WITH_ARM
ON
CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
endif
()
if
(
WITH_ARM
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX OFF CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX
OFF
CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
add_definitions
(
-DPADDLE_WITH_ARM
)
endif
()
if
(
WITH_SW
)
if
(
WITH_SW
)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC -mieee"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC -mieee"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_SW
)
endif
()
if
(
WITH_MIPS
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
if
(
WITH_MIPS
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
add_definitions
(
-DPADDLE_WITH_MIPS
)
endif
()
if
(
WITH_ONEMKL
)
if
(
WITH_ONEMKL
)
add_definitions
(
-DPADDLE_WITH_ONEMKL
)
endif
()
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-faligned-new"
)
endif
()
endif
()
...
...
@@ -451,18 +538,25 @@ add_definitions(-DPADDLE_DLL_EXPORT)
if
(
ON_INFER
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
include
(
inference_lib
)
add_definitions
(
-DPADDLE_ON_INFERENCE
)
else
()
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
endif
()
if
(
WITH_STRIP
)
find_program
(
STRIP_PATH strip
)
if
(
NOT STRIP_PATH OR NOT LINUX
)
set
(
WITH_STRIP OFF CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
set
(
WITH_STRIP
OFF
CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
endif
()
endif
()
...
...
cmake/experimental.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include
(
experiments/cuda_module_loading_lazy
)
cmake/experiments/cuda_module_loading_lazy.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
if
(
LINUX
)
if
(
NOT
${
ON_INFER
}
OR NOT
${
LINUX
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
)
return
()
endif
()
if
(
NOT
${
CUDA_FOUND
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA"
)
return
()
endif
()
if
(
${
CUDA_VERSION
}
VERSION_GREATER_EQUAL
"11.7"
)
message
(
"cuda 11.7+ already support lazy module loading"
)
return
()
endif
()
message
(
"for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a"
)
set
(
CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE BOOL
""
FORCE
)
set
(
CMAKE_CUDA_FLAGS
"--cudart shared"
)
enable_language
(
CUDA
)
execute_process
(
COMMAND
"rm"
"-rf"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
COMMAND
"bash"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
"
${
CUDA_TOOLKIT_ROOT_DIR
}
"
)
execute_process
(
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
)
set
(
CUDA_NVCC_EXECUTABLE
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
set
(
CMAKE_CUDA_COMPILER
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
endif
()
tools/nvcc_lazy.sh
0 → 100644
浏览文件 @
210fa777
#!/usr/bin/env bash
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo
"#!/usr/bin/env bash"
>>
$1
echo
"unset GREP_OPTIONS"
>>
$1
echo
"set -e"
>>
$1
echo
-e
>>
$1
echo
"# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved."
>>
$1
echo
"#"
>>
$1
echo
"# Licensed under the Apache License, Version 2.0 (the
\"
License
\"
);"
>>
$1
echo
"# you may not use this file except in compliance with the License."
>>
$1
echo
"# You may obtain a copy of the License at"
>>
$1
echo
"#"
>>
$1
echo
"# http://www.apache.org/licenses/LICENSE-2.0"
>>
$1
echo
"#"
>>
$1
echo
"# Unless required by applicable law or agreed to in writing, software"
>>
$1
echo
"# distributed under the License is distributed on an
\"
AS IS
\"
BASIS,"
>>
$1
echo
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
>>
$1
echo
"# See the License for the specific language governing permissions and"
>>
$1
echo
"# limitations under the License."
>>
$1
echo
-e
>>
$1
echo
-e
>>
$1
echo
"## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY"
>>
$1
echo
-e
>>
$1
echo
"# set cicc PATH for Centos"
>>
$1
echo
"export PATH=
\$
PATH:
$2
/nvvm/bin"
>>
$1
echo
-e
>>
$1
echo
"# check nvcc version, if nvcc >= 11.7, just run nvcc itself"
>>
$1
echo
"CUDA_VERSION=
\$
(nvcc --version | grep -oP '(?<=V)
\d
*
\.\d
*')"
>>
$1
echo
"CUDA_VERSION_MAJOR=
\$
{CUDA_VERSION%.*}"
>>
$1
echo
"CUDA_VERSION_MINOR=
\$
{CUDA_VERSION#*.}"
>>
$1
echo
"if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then"
>>
$1
echo
" nvcc
\"\$
@
\"
"
>>
$1
echo
" exit"
>>
$1
echo
"fi"
>>
$1
echo
-e
>>
$1
echo
"BUILDDIR=
\$
(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)"
>>
$1
echo
"echo
\"\$
@
\"
>
\$
{BUILDDIR}/args"
>>
$1
echo
"BUILDSH=
\$
{BUILDDIR}/build.sh"
>>
$1
echo
"
$2
/bin/nvcc --dryrun --keep --keep-dir=
\$
{BUILDDIR}
\"\$
@
\"
2>&1 | sed -e 's/#
\\
$
//;/^rm/d' >
\$
BUILDSH"
>>
$1
echo
"sed -i -e '/^
\s
*--/d'
\$
BUILDSH"
>>
$1
echo
"sed -ne '1,/^cicc.*cudafe1.stub.c/p'
\$
{BUILDSH} >
\$
{BUILDSH}.pre"
>>
$1
echo
"sed -e '1,/^cicc.*cudafe1.stub.c/d'
\$
{BUILDSH} >
\$
{BUILDSH}.post"
>>
$1
echo
-e
>>
$1
echo
"sed -i -e '/LIBRARIES=/{s/
\s
//g;s/
\"\"
/ /g}'
\$
{BUILDSH}.pre"
>>
$1
echo
-e
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.pre"
>>
$1
echo
"STUBF=
\$
(find
\$
BUILDDIR -name *.cudafe1.stub.c)"
>>
$1
echo
"CUFILE=
\$
(basename -s '.cudafe1.stub.c'
\$
STUBF)"
>>
$1
echo
"sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/__sti____cudaRegisterAll
\(
.*{
\)
/__do____cudaRegisterAll
\1
/'
\$
STUBF"
>>
$1
echo
"# sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,
\\\"
===>
\$
{CUFILE} lazy-load? %d
\\\\\\\\
n
\\\"
, l); __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e '/__try____cudaRegisterAll
\(
.*{
\)
/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/{
\(
__device_stub__
\)
/{__try____cudaRegisterAll(1);
\1
/'
\$
STUBF"
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.post"
>>
$1
echo
"rm -rf
\$
BUILDDIR"
>>
$1
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录