Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
210fa777
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
210fa777
编写于
8月 08, 2022
作者:
X
xiaoxiaohehe001
提交者:
GitHub
8月 08, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
nvcclazylinux (#44957)
上级
30b66f03
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
537 addition
and
301 deletion
+537
-301
CMakeLists.txt
CMakeLists.txt
+395
-301
cmake/experimental.cmake
cmake/experimental.cmake
+17
-0
cmake/experiments/cuda_module_loading_lazy.cmake
cmake/experiments/cuda_module_loading_lazy.cmake
+55
-0
tools/nvcc_lazy.sh
tools/nvcc_lazy.sh
+70
-0
未找到文件。
CMakeLists.txt
浏览文件 @
210fa777
...
...
@@ -13,12 +13,12 @@
# limitations under the License
if
(
APPLE AND WITH_ARM
)
# cmake 3.19.2 version starts to support M1
cmake_minimum_required
(
VERSION 3.19.2
)
cmake_policy
(
VERSION 3.19.2
)
# cmake 3.19.2 version starts to support M1
cmake_minimum_required
(
VERSION 3.19.2
)
cmake_policy
(
VERSION 3.19.2
)
else
(
APPLE AND WITH_ARM
)
cmake_minimum_required
(
VERSION 3.15
)
cmake_policy
(
VERSION 3.10
)
cmake_minimum_required
(
VERSION 3.15
)
cmake_policy
(
VERSION 3.10
)
endif
(
APPLE AND WITH_ARM
)
set
(
CMAKE_MODULE_PATH
${
CMAKE_MODULE_PATH
}
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cmake"
)
set
(
PADDLE_SOURCE_DIR
${
CMAKE_CURRENT_SOURCE_DIR
}
)
...
...
@@ -28,9 +28,12 @@ include(system)
# Note(zhouwei): Ninja Generator will set CMAKE_BUILD_TYPE to Debug
if
(
NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE
)
set
(
CMAKE_BUILD_TYPE
"Release"
CACHE
STRING
"Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
FORCE
)
endif
()
project
(
paddle CXX C
)
...
...
@@ -39,152 +42,181 @@ project(paddle CXX C)
# TODO(Shibo Tao): remove find_package(CUDA) completely.
find_package
(
CUDA QUIET
)
find_package
(
MKL CONFIG QUIET
)
option
(
WITH_ONEMKL
"Compile PaddlePaddle with oneMKL"
OFF
)
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_TENSORRT
"Compile PaddlePaddle with NVIDIA TensorRT"
OFF
)
option
(
WITH_XPU
"Compile PaddlePaddle with BAIDU KUNLUN XPU"
OFF
)
option
(
WITH_XPU_KP
"Compile PaddlePaddle with BAIDU XPU compiler "
OFF
)
option
(
WITH_MLU
"Compile PaddlePaddle with CAMBRICON MLU"
OFF
)
option
(
WITH_WIN_DUMP_DBG
"Compile with windows core dump debug mode"
OFF
)
option
(
WITH_ASCEND
"Compile PaddlePaddle with ASCEND"
OFF
)
option
(
WITH_ROCM
"Compile PaddlePaddle with ROCM platform"
OFF
)
option
(
WITH_IPU
"Compile PaddlePaddle with Graphcore IPU"
OFF
)
option
(
WITH_ONEMKL
"Compile PaddlePaddle with oneMKL"
OFF
)
option
(
WITH_GPU
"Compile PaddlePaddle with NVIDIA GPU"
${
CUDA_FOUND
}
)
option
(
WITH_TENSORRT
"Compile PaddlePaddle with NVIDIA TensorRT"
OFF
)
option
(
WITH_XPU
"Compile PaddlePaddle with BAIDU KUNLUN XPU"
OFF
)
option
(
WITH_XPU_KP
"Compile PaddlePaddle with BAIDU XPU compiler "
OFF
)
option
(
WITH_MLU
"Compile PaddlePaddle with CAMBRICON MLU"
OFF
)
option
(
WITH_WIN_DUMP_DBG
"Compile with windows core dump debug mode"
OFF
)
option
(
WITH_ASCEND
"Compile PaddlePaddle with ASCEND"
OFF
)
option
(
WITH_ROCM
"Compile PaddlePaddle with ROCM platform"
OFF
)
option
(
WITH_IPU
"Compile PaddlePaddle with Graphcore IPU"
OFF
)
# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
# to develop some acl related functionality on x86
option
(
WITH_ASCEND_CL
"Compile PaddlePaddle with ASCEND CL"
${
WITH_ASCEND
}
)
option
(
WITH_ASCEND_CXX11
"Compile PaddlePaddle with ASCEND and CXX11 ABI"
OFF
)
option
(
WITH_ONNXRUNTIME
"Compile PaddlePaddle with ONNXRUNTIME"
OFF
)
option
(
WITH_ASCEND_CL
"Compile PaddlePaddle with ASCEND CL"
${
WITH_ASCEND
}
)
option
(
WITH_ASCEND_CXX11
"Compile PaddlePaddle with ASCEND and CXX11 ABI"
OFF
)
option
(
WITH_ONNXRUNTIME
"Compile PaddlePaddle with ONNXRUNTIME"
OFF
)
# Note(zhouwei): It use option above, so put here
include
(
init
)
include
(
generic
)
# simplify cmake module
include
(
generic
)
# simplify cmake module
include
(
experimental
)
# experimental build options
if
(
WITH_GPU
AND WITH_XPU
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU at the same time"
)
if
(
WITH_GPU
AND WITH_XPU
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_XPU_KP
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU2 at the same time"
)
if
(
WITH_GPU AND WITH_XPU_KP
)
message
(
FATAL_ERROR
"Error when compile GPU and XPU2 at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ASCEND
)
message
(
FATAL_ERROR
"Error when compile GPU and ASCEND at the same time"
)
if
(
WITH_GPU AND WITH_ASCEND
)
message
(
FATAL_ERROR
"Error when compile GPU and ASCEND at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_ROCM
)
message
(
FATAL_ERROR
"Error when compile CUDA and ROCM at the same time"
)
if
(
WITH_GPU AND WITH_ROCM
)
message
(
FATAL_ERROR
"Error when compile CUDA and ROCM at the same time"
)
endif
()
if
(
WITH_GPU AND WITH_MLU
)
message
(
FATAL_ERROR
"Error when compile GPU and MLU at the same time"
)
if
(
WITH_GPU AND WITH_MLU
)
message
(
FATAL_ERROR
"Error when compile GPU and MLU at the same time"
)
endif
()
if
(
WITH_GPU AND NOT APPLE
)
enable_language
(
CUDA
)
message
(
STATUS
"CUDA compiler:
${
CMAKE_CUDA_COMPILER
}
, version: "
"
${
CMAKE_CUDA_COMPILER_ID
}
${
CMAKE_CUDA_COMPILER_VERSION
}
"
)
enable_language
(
CUDA
)
message
(
STATUS
"CUDA compiler:
${
CMAKE_CUDA_COMPILER
}
, version: "
"
${
CMAKE_CUDA_COMPILER_ID
}
${
CMAKE_CUDA_COMPILER_VERSION
}
"
)
endif
()
message
(
STATUS
"CXX compiler:
${
CMAKE_CXX_COMPILER
}
, version: "
"
${
CMAKE_CXX_COMPILER_ID
}
${
CMAKE_CXX_COMPILER_VERSION
}
"
)
"
${
CMAKE_CXX_COMPILER_ID
}
${
CMAKE_CXX_COMPILER_VERSION
}
"
)
message
(
STATUS
"C compiler:
${
CMAKE_C_COMPILER
}
, version: "
"
${
CMAKE_C_COMPILER_ID
}
${
CMAKE_C_COMPILER_VERSION
}
"
)
"
${
CMAKE_C_COMPILER_ID
}
${
CMAKE_C_COMPILER_VERSION
}
"
)
message
(
STATUS
"AR tools:
${
CMAKE_AR
}
"
)
# MUSL build turn off warnings
if
(
WITH_MUSL
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations -Wno-error=pessimizing-move -Wno-error=deprecated-copy"
)
endif
()
if
(
APPLE AND WITH_ARM
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-target arm64-apple-darwin"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_C_FLAGS
}
-target arm64-apple-darwin"
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-target arm64-apple-darwin"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_C_FLAGS
}
-target arm64-apple-darwin"
)
endif
()
if
(
WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=0"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=0"
)
endif
()
if
(
WIN32
)
option
(
MSVC_STATIC_CRT
"use static C Runtime library by default"
ON
)
set
(
CMAKE_SUPPRESS_REGENERATION ON
)
set
(
CMAKE_STATIC_LIBRARY_PREFIX lib
)
set
(
CMAKE_C_FLAGS_DEBUG
"
${
CMAKE_C_FLAGS_DEBUG
}
/bigobj"
)
set
(
CMAKE_C_FLAGS_RELEASE
"
${
CMAKE_C_FLAGS_RELEASE
}
/bigobj"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
/bigobj"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/bigobj"
)
if
(
"
${
CMAKE_GENERATOR
}
"
STREQUAL
"Ninja"
)
set
(
CMAKE_C_FLAGS_DEBUG
"
${
CMAKE_C_FLAGS_DEBUG
}
/Zc:inline"
)
set
(
CMAKE_C_FLAGS_RELEASE
"
${
CMAKE_C_FLAGS_RELEASE
}
/Zc:inline"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
/Zc:inline"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/Zc:inline"
)
option
(
MSVC_STATIC_CRT
"use static C Runtime library by default"
ON
)
set
(
CMAKE_SUPPRESS_REGENERATION ON
)
set
(
CMAKE_STATIC_LIBRARY_PREFIX lib
)
set
(
CMAKE_C_FLAGS_DEBUG
"
${
CMAKE_C_FLAGS_DEBUG
}
/bigobj"
)
set
(
CMAKE_C_FLAGS_RELEASE
"
${
CMAKE_C_FLAGS_RELEASE
}
/bigobj"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
/bigobj"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/bigobj"
)
if
(
"
${
CMAKE_GENERATOR
}
"
STREQUAL
"Ninja"
)
set
(
CMAKE_C_FLAGS_DEBUG
"
${
CMAKE_C_FLAGS_DEBUG
}
/Zc:inline"
)
set
(
CMAKE_C_FLAGS_RELEASE
"
${
CMAKE_C_FLAGS_RELEASE
}
/Zc:inline"
)
set
(
CMAKE_CXX_FLAGS_DEBUG
"
${
CMAKE_CXX_FLAGS_DEBUG
}
/Zc:inline"
)
set
(
CMAKE_CXX_FLAGS_RELEASE
"
${
CMAKE_CXX_FLAGS_RELEASE
}
/Zc:inline"
)
endif
()
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
endforeach
(
flag_var
)
endif
()
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions
(
"-DNOMINMAX"
)
# windows build turn off warnings, use parallel compiling.
foreach
(
flag_var
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL
CMAKE_C_FLAGS_RELWITHDEBINFO
)
string
(
REGEX REPLACE
"/W[1-4]"
" /W0 "
${
flag_var
}
"
${${
flag_var
}}
"
)
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
# For Visual Studio generators, /MP should be added.
# For other generators like Ninja, it is not need to add /MP.
if
(
CMAKE_GENERATOR MATCHES
"Visual Studio"
AND NOT WITH_GPU
)
math
(
EXPR PROCESS_MAX
"
${
CPU_CORES
}
* 2 / 3"
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/MP
${
PROCESS_MAX
}
"
)
endif
()
if
(
MSVC_STATIC_CRT
)
message
(
STATUS
"Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019"
)
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
endforeach
(
flag_var
)
endforeach
(
flag_var
)
foreach
(
flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/w"
)
endforeach
(
flag_var
)
# Windows Remove /Zi, /ZI for Release, MinSizeRel builds
foreach
(
flag_var
CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL
)
if
(
${
flag_var
}
MATCHES
"/Z[iI]"
)
string
(
REGEX REPLACE
"/Z[iI]"
""
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
endforeach
(
flag_var
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
if
(
MSVC_STATIC_CRT
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/NODEFAULTLIB:MSVCRT.LIB"
)
endif
()
endforeach
(
flag_var
)
# NOTE(zhouwei): msvc max/min macro conflict with std::min/max, define NOMINMAX globally
add_definitions
(
"-DNOMINMAX"
)
# windows build turn off warnings, use parallel compiling.
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
CMAKE_C_FLAGS CMAKE_C_FLAGS_DEBUG CMAKE_C_FLAGS_RELEASE
CMAKE_C_FLAGS_MINSIZEREL CMAKE_C_FLAGS_RELWITHDEBINFO
)
string
(
REGEX REPLACE
"/W[1-4]"
" /W0 "
${
flag_var
}
"
${${
flag_var
}}
"
)
# NOTE(zhouwei25): GPU compile have too high memory utilization when parallel compiling,
# For Visual Studio generators, /MP should be added.
# For other generators like Ninja, it is not need to add /MP.
if
(
CMAKE_GENERATOR MATCHES
"Visual Studio"
AND NOT WITH_GPU
)
math
(
EXPR PROCESS_MAX
"
${
CPU_CORES
}
* 2 / 3"
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/MP
${
PROCESS_MAX
}
"
)
endif
()
endforeach
(
flag_var
)
foreach
(
flag_var CMAKE_CXX_FLAGS CMAKE_C_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/w"
)
endforeach
(
flag_var
)
# Windows Remove /Zi, /ZI for Release, MinSizeRel builds
foreach
(
flag_var
CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL
)
if
(
${
flag_var
}
MATCHES
"/Z[iI]"
)
string
(
REGEX REPLACE
"/Z[iI]"
""
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
()
endforeach
(
flag_var
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/wd4068 /wd4129 /wd4244 /wd4267 /wd4297 /wd4530 /wd4577 /wd4819 /wd4838"
)
if
(
WITH_WIN_DUMP_DBG
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/Zi"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/Zi"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/ignore:4049 /ignore:4217 /ignore:4006 /ignore:4221"
)
if
(
MSVC_STATIC_CRT
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/NODEFAULTLIB:MSVCRT.LIB"
)
endif
()
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/DEBUG /OPT:REF /OPT:ICF"
)
endforeach
(
flag_var
)
if
(
WITH_WIN_DUMP_DBG
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
/Zi"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
/Zi"
)
foreach
(
flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_LINKER_FLAGS
)
set
(
${
flag_var
}
"
${${
flag_var
}}
/DEBUG /OPT:REF /OPT:ICF"
)
endforeach
(
flag_var
)
add_definitions
(
"-DWITH_WIN_DUMP_DBG"
)
endif
()
add_definitions
(
"-DWITH_WIN_DUMP_DBG"
)
endif
()
else
(
WIN32
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-Wno-error=deprecated-declarations -Wno-deprecated-declarations"
)
endif
(
WIN32
)
find_package
(
Git REQUIRED
)
...
...
@@ -192,7 +224,7 @@ find_package(Git REQUIRED)
# config GIT_URL with github mirrors to speed up dependent repos clone
option
(
GIT_URL
"Git URL to clone dependent repos"
${
GIT_URL
}
)
if
(
NOT GIT_URL
)
set
(
GIT_URL
"https://github.com"
)
set
(
GIT_URL
"https://github.com"
)
endif
()
find_package
(
Threads REQUIRED
)
...
...
@@ -200,58 +232,75 @@ find_package(Threads REQUIRED)
include
(
simd
)
################################ Exposed Configurations #######################################
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_PYTHON
"Compile PaddlePaddle with python interpreter"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
WITH_DISTRIBUTE
"Compile with distributed support"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
option
(
WITH_AVX
"Compile PaddlePaddle with AVX intrinsics"
${
AVX_FOUND
}
)
option
(
WITH_PYTHON
"Compile PaddlePaddle with python interpreter"
ON
)
option
(
WITH_TESTING
"Compile PaddlePaddle with unit testing"
OFF
)
option
(
WITH_MKL
"Compile PaddlePaddle with MKL support."
${
AVX_FOUND
}
)
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
WITH_DISTRIBUTE
"Compile with distributed support"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
ON_INFER
"Turn on inference optimization and inference-lib generation"
OFF
)
################################ Internal Configurations #######################################
option
(
WITH_NV_JETSON
"Compile PaddlePaddle with NV JETSON"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
OPTION
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
option
(
WITH_PSLIB
"Compile with pslib support"
OFF
)
option
(
WITH_BOX_PS
"Compile with box_ps support"
OFF
)
option
(
WITH_XBYAK
"Compile with xbyak support"
ON
)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
WITH_PSCORE
"Compile with parameter server support"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_HETERPS
"Compile with heterps"
OFF}
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
WITH_LITE
"Compile Paddle Fluid with Lite Engine"
OFF
)
option
(
WITH_CINN
"Compile PaddlePaddle with CINN"
OFF
)
option
(
WITH_INFRT
"Compile PaddlePaddle with INFRT"
OFF
)
option
(
WITH_NCCL
"Compile PaddlePaddle with NCCL support"
ON
)
option
(
WITH_RCCL
"Compile PaddlePaddle with RCCL support"
ON
)
option
(
WITH_XPU_BKCL
"Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL"
OFF
)
option
(
WITH_CNCL
"Compile PaddlePaddle with CNCL support"
OFF
)
option
(
WITH_CRYPTO
"Compile PaddlePaddle with crypto support"
ON
)
option
(
WITH_ARM
"Compile PaddlePaddle with arm support"
OFF
)
option
(
WITH_SW
"Compile PaddlePaddle with sw support"
OFF
)
option
(
WITH_MIPS
"Compile PaddlePaddle with mips support"
OFF
)
option
(
WITH_MUSL
"Compile with musl libc instead of gblic"
OFF
)
option
(
WITH_UNITY_BUILD
"Compile with UnityBuild mode"
OFF
)
option
(
WITH_STRIP
"Strip so files of Whl packages"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
WITH_ASCEND_INT64
"Compile with int64 kernel for ascend NPU"
OFF
)
option
(
WITH_POCKETFFT
"Compile with pocketfft support"
ON
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_CUSTOM_DEVICE
"Compile with custom device support"
OFF
)
option
(
WITH_NV_JETSON
"Compile PaddlePaddle with NV JETSON"
OFF
)
option
(
WITH_PROFILER
"Compile PaddlePaddle with GPU profiler and gperftools"
OFF
)
option
(
WITH_COVERAGE
"Compile PaddlePaddle with code coverage"
OFF
)
option
(
WITH_INCREMENTAL_COVERAGE
"Generate coverage reports only for incremental code"
OFF
)
option
(
WITH_LIBXSMM
"Compile with libxsmm"
OFF
)
option
(
COVERALLS_UPLOAD
"Package code coverage data to coveralls"
OFF
)
option
(
WITH_PSLIB
"Compile with pslib support"
OFF
)
option
(
WITH_BOX_PS
"Compile with box_ps support"
OFF
)
option
(
WITH_XBYAK
"Compile with xbyak support"
ON
)
option
(
WITH_CONTRIB
"Compile the third-party contributation"
OFF
)
option
(
WITH_PSCORE
"Compile with parameter server support"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_HETERPS
"Compile with heterps"
OFF}
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
SANITIZER_TYPE
"Choose the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined"
OFF
)
option
(
WITH_LITE
"Compile Paddle Fluid with Lite Engine"
OFF
)
option
(
WITH_CINN
"Compile PaddlePaddle with CINN"
OFF
)
option
(
WITH_INFRT
"Compile PaddlePaddle with INFRT"
OFF
)
option
(
WITH_NCCL
"Compile PaddlePaddle with NCCL support"
ON
)
option
(
WITH_RCCL
"Compile PaddlePaddle with RCCL support"
ON
)
option
(
WITH_XPU_BKCL
"Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL"
OFF
)
option
(
WITH_CNCL
"Compile PaddlePaddle with CNCL support"
OFF
)
option
(
WITH_CRYPTO
"Compile PaddlePaddle with crypto support"
ON
)
option
(
WITH_ARM
"Compile PaddlePaddle with arm support"
OFF
)
option
(
WITH_SW
"Compile PaddlePaddle with sw support"
OFF
)
option
(
WITH_MIPS
"Compile PaddlePaddle with mips support"
OFF
)
option
(
WITH_MUSL
"Compile with musl libc instead of gblic"
OFF
)
option
(
WITH_UNITY_BUILD
"Compile with UnityBuild mode"
OFF
)
option
(
WITH_STRIP
"Strip so files of Whl packages"
OFF
)
option
(
NEW_RELEASE_PYPI
"PaddlePaddle next-level release strategy for pypi cubin package"
OFF
)
option
(
NEW_RELEASE_ALL
"PaddlePaddle next-level release strategy for all arches cubin package"
OFF
)
option
(
NEW_RELEASE_JIT
"PaddlePaddle next-level release strategy for backup jit package"
OFF
)
option
(
WITH_ASCEND_INT64
"Compile with int64 kernel for ascend NPU"
OFF
)
option
(
WITH_POCKETFFT
"Compile with pocketfft support"
ON
)
option
(
WITH_RECORD_BUILDTIME
"Compile PaddlePaddle with record all targets build time"
OFF
)
option
(
WITH_CUSTOM_DEVICE
"Compile with custom device support"
OFF
)
if
(
WITH_RECORD_BUILDTIME
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
else
()
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_COMPILE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
set_property
(
GLOBAL PROPERTY RULE_LAUNCH_LINK
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tools/get_build_time.sh"
)
else
()
include
(
ccache
)
# set ccache for compilation ; if WITH_RECORD_BUILDTIME=ON can't use ccache
endif
()
unset
(
WITH_RECORD_BUILDTIME CACHE
)
...
...
@@ -261,186 +310,224 @@ if(NOT PY_VERSION)
endif
()
set
(
PYBIND11_PYTHON_VERSION
${
PY_VERSION
}
)
# the type of sanitizer, options are: Address, Leak, Memory, Thread, Undefined. Default: OFF
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
if
(
SANITIZER_TYPE AND NOT
"
${
SANITIZER_TYPE
}
"
MATCHES
"^(Address|Leak|Memory|Thread|Undefined)$"
)
message
(
"Choose the correct type of sanitizer"
)
return
()
endif
()
if
(
LINUX AND NOT WITH_CUSTOM_DEVICE AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
if
(
LINUX
AND NOT WITH_CUSTOM_DEVICE
AND NOT ON_INFER
)
set
(
WITH_CUSTOM_DEVICE ON
)
endif
()
if
(
WIN32
)
if
(
WITH_DISTRIBUTE
)
MESSAGE
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE OFF CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
endif
()
if
(
WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
endif
()
endif
()
if
(
NOT WITH_GPU AND WITH_NCCL
)
MESSAGE
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL OFF CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
if
(
WITH_DISTRIBUTE
)
message
(
WARNING
"Disable DISTRIBUTE when compiling for Windows. Force WITH_DISTRIBUTE=OFF."
)
set
(
WITH_DISTRIBUTE
OFF
CACHE STRING
"Disable DISTRIBUTE when compiling for Windows"
FORCE
)
endif
()
if
(
WITH_NCCL
)
message
(
WARNING
"Disable NCCL when compiling for Windows. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling for Windows"
FORCE
)
endif
()
endif
()
if
(
NOT WITH_GPU AND WITH_NCCL
)
message
(
WARNING
"Disable NCCL when compiling without GPU. Force WITH_NCCL=OFF."
)
set
(
WITH_NCCL
OFF
CACHE STRING
"Disable NCCL when compiling without GPU"
FORCE
)
endif
()
# force WITH_XPU on when WITH_XPU_KP
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
MESSAGE
(
WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON."
)
set
(
WITH_XPU ON CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
if
(
WITH_XPU_KP AND NOT WITH_XPU
)
message
(
WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON."
)
set
(
WITH_XPU
ON
CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP"
FORCE
)
endif
()
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
MESSAGE
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL OFF CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
if
(
NOT WITH_XPU AND WITH_XPU_BKCL
)
message
(
WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF."
)
set
(
WITH_XPU_BKCL
OFF
CACHE STRING
"Disable BKCL when compiling without XPU"
FORCE
)
endif
()
if
(
NOT WITH_MLU AND WITH_CNCL
)
MESSAGE
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU OFF CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
if
(
NOT WITH_MLU AND WITH_CNCL
)
message
(
WARNING
"Disable CNCL when compiling without MLU. Force WITH_MLU=OFF."
)
set
(
WITH_MLU
OFF
CACHE STRING
"Disable CNCL when compiling without MLU"
FORCE
)
endif
()
if
(
WITH_NCCL
)
add_definitions
(
"-DPADDLE_WITH_NCCL"
)
include
(
nccl
)
add_definitions
(
"-DPADDLE_WITH_NCCL"
)
include
(
nccl
)
else
()
if
(
WITH_GPU
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
if
(
WITH_GPU
)
message
(
WARNING
"If the environment is multi-card, the WITH_NCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
if
(
WITH_BRPC_RDMA
)
message
(
STATUS
"Use brpc with rdma."
)
if
(
NOT WITH_DISTRIBUTE
)
message
(
FATAL_ERROR
"Can't use brpc rdma in no distribute env."
)
endif
()
message
(
STATUS
"Use brpc with rdma."
)
if
(
NOT WITH_DISTRIBUTE
)
message
(
FATAL_ERROR
"Can't use brpc rdma in no distribute env."
)
endif
()
endif
()
if
(
WITH_GPU
)
include
(
cuda
)
# lite subgraph compilation depends on CUDNN_ROOT,
# so include(cudnn) needs to be in front of include(third_party/lite)
include
(
cudnn
)
# set cudnn libraries, must before configure
include
(
tensorrt
)
# there is no official support of nccl, cupti in windows
if
(
NOT WIN32
)
include
(
cupti
)
endif
()
include
(
cuda
)
# lite subgraph compilation depends on CUDNN_ROOT,
# so include(cudnn) needs to be in front of include(third_party/lite)
include
(
cudnn
)
# set cudnn libraries, must before configure
include
(
tensorrt
)
# there is no official support of nccl, cupti in windows
if
(
NOT WIN32
)
include
(
cupti
)
endif
()
endif
()
if
(
WITH_MLU
)
include
(
neuware
)
include
(
neuware
)
endif
()
if
(
WITH_ROCM
)
include
(
hip
)
include
(
miopen
)
# set miopen libraries, must before configure
include
(
hip
)
include
(
miopen
)
# set miopen libraries, must before configure
endif
(
WITH_ROCM
)
if
(
WITH_XPU_KP
)
include
(
xpu_kp
)
include
(
xpu_kp
)
endif
()
if
(
NOT WITH_ROCM AND WITH_RCCL
)
MESSAGE
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL OFF CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
if
(
NOT WITH_ROCM AND WITH_RCCL
)
message
(
WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF."
)
set
(
WITH_RCCL
OFF
CACHE STRING
"Disable RCCL when compiling without ROCM"
FORCE
)
endif
()
if
(
WITH_RCCL
)
add_definitions
(
"-DPADDLE_WITH_RCCL"
)
include
(
rccl
)
add_definitions
(
"-DPADDLE_WITH_RCCL"
)
include
(
rccl
)
else
()
if
(
WITH_ROCM
)
MESSAGE
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
if
(
WITH_ROCM
)
message
(
WARNING
"If the environment is multi-card, the WITH_RCCL option needs to be turned on, otherwise only a single card can be used."
)
endif
()
endif
()
if
(
WITH_HETERPS AND WITH_PSLIB
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=0"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-D_GLIBCXX_USE_CXX11_ABI=0"
)
endif
()
if
(
WITH_DISTRIBUTE
)
if
(
LINUX
)
set
(
WITH_GLOO ON CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
endif
()
if
(
WITH_ASCEND_CL
)
# disable WITH_PSCORE for NPU before include third_party
MESSAGE
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE OFF CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
endif
()
endif
()
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
flags
)
# set paddle compile flags
if
(
LINUX
)
set
(
WITH_GLOO
ON
CACHE STRING
"Enable GLOO when compiling WITH_DISTRIBUTE=ON."
FORCE
)
endif
()
if
(
WITH_ASCEND_CL
)
# disable WITH_PSCORE for NPU before include third_party
message
(
WARNING
"Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF."
)
set
(
WITH_PSCORE
OFF
CACHE BOOL
"Disable WITH_PSCORE when compiling with NPU"
FORCE
)
endif
()
endif
()
include
(
third_party
)
# download, build, install third_party, Contains about 20+ dependencies
include
(
flags
)
# set paddle compile flags
if
(
WITH_PROFILER
)
find_package
(
Gperftools REQUIRED
)
include_directories
(
${
GPERFTOOLS_INCLUDE_DIR
}
)
add_definitions
(
-DWITH_GPERFTOOLS
)
find_package
(
Gperftools REQUIRED
)
include_directories
(
${
GPERFTOOLS_INCLUDE_DIR
}
)
add_definitions
(
-DWITH_GPERFTOOLS
)
endif
()
include
(
util
)
# set unittest and link libs
include
(
version
)
# set PADDLE_VERSION
include
(
coveralls
)
# set code coverage
include
(
configure
)
# add paddle env configuration
include
(
util
)
# set unittest and link libs
include
(
version
)
# set PADDLE_VERSION
include
(
coveralls
)
# set code coverage
include
(
configure
)
# add paddle env configuration
include_directories
(
"
${
PADDLE_SOURCE_DIR
}
"
)
if
(
WITH_NV_JETSON
)
set
(
WITH_ARM ON CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
set
(
WITH_ARM
ON
CACHE STRING
"Set WITH_ARM=ON when compiling WITH_NV_JETSON=ON."
FORCE
)
endif
()
if
(
WITH_ARM
)
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX OFF CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
add_definitions
(
-DPADDLE_WITH_ARM
)
endif
()
if
(
WITH_SW
)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC -mieee"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC -mieee"
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL OFF CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_SW
)
endif
()
if
(
WITH_MIPS
)
set
(
WITH_XBYAK OFF CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
add_definitions
(
-DPADDLE_WITH_MIPS
)
endif
()
if
(
WITH_ONEMKL
)
add_definitions
(
-DPADDLE_WITH_ONEMKL
)
endif
()
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-faligned-new"
)
endif
()
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC"
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_ARM=ON."
FORCE
)
set
(
WITH_AVX
OFF
CACHE STRING
"Disable AVX when compiling WITH_AVX=OFF."
FORCE
)
add_definitions
(
-DPADDLE_WITH_ARM
)
endif
()
if
(
WITH_SW
)
# mieee flag solves floating-point exceptions under sw and ALPHA architectures
set
(
CMAKE_C_FLAGS
"
${
CMAKE_C_FLAGS
}
-fPIC -mieee"
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-fPIC -mieee"
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_SW=ON"
FORCE
)
set
(
WITH_MKL
OFF
CACHE STRING
"Disable MKL when compiling WITH_SW=ON."
FORCE
)
add_definitions
(
-DPADDLE_WITH_SW
)
endif
()
if
(
WITH_MIPS
)
set
(
WITH_XBYAK
OFF
CACHE STRING
"Disable XBYAK when compiling WITH_MIPS=ON"
FORCE
)
add_definitions
(
-DPADDLE_WITH_MIPS
)
endif
()
if
(
WITH_ONEMKL
)
add_definitions
(
-DPADDLE_WITH_ONEMKL
)
endif
()
if
(
WITH_HETERPS
)
if
(
CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-faligned-new"
)
endif
()
endif
()
set
(
PADDLE_PYTHON_BUILD_DIR
"
${
CMAKE_CURRENT_BINARY_DIR
}
/python/build"
)
...
...
@@ -450,25 +537,32 @@ set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_definitions
(
-DPADDLE_DLL_EXPORT
)
if
(
ON_INFER
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
include
(
inference_lib
)
add_definitions
(
-DPADDLE_ON_INFERENCE
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
include
(
inference_lib
)
add_definitions
(
-DPADDLE_ON_INFERENCE
)
else
()
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
#TODO(luotao), combine this warning with `make inference_lib_dist` command.
message
(
WARNING
"On inference mode, will take place some specific optimization. Turn on the ON_INFER flag when building inference_lib only."
)
endif
()
if
(
WITH_STRIP
)
find_program
(
STRIP_PATH strip
)
if
(
NOT STRIP_PATH OR NOT LINUX
)
set
(
WITH_STRIP OFF CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
endif
()
find_program
(
STRIP_PATH strip
)
if
(
NOT STRIP_PATH OR NOT LINUX
)
set
(
WITH_STRIP
OFF
CACHE STRING
"Command strip is only used on Linux when it exists."
FORCE
)
endif
()
endif
()
add_subdirectory
(
paddle
)
if
(
WITH_PYTHON
)
add_subdirectory
(
python
)
add_subdirectory
(
python
)
endif
()
get_directory_property
(
all_inc_dirs INCLUDE_DIRECTORIES
)
...
...
cmake/experimental.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include
(
experiments/cuda_module_loading_lazy
)
cmake/experiments/cuda_module_loading_lazy.cmake
0 → 100644
浏览文件 @
210fa777
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.7+
# this experiment option makes Paddle supports lazy loading before CUDA 11.7.
if
(
LINUX
)
if
(
NOT
${
ON_INFER
}
OR NOT
${
LINUX
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms"
)
return
()
endif
()
if
(
NOT
${
CUDA_FOUND
}
)
message
(
"EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA"
)
return
()
endif
()
if
(
${
CUDA_VERSION
}
VERSION_GREATER_EQUAL
"11.7"
)
message
(
"cuda 11.7+ already support lazy module loading"
)
return
()
endif
()
message
(
"for cuda before 11.7, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a"
)
set
(
CUDA_USE_STATIC_CUDA_RUNTIME
OFF
CACHE BOOL
""
FORCE
)
set
(
CMAKE_CUDA_FLAGS
"--cudart shared"
)
enable_language
(
CUDA
)
execute_process
(
COMMAND
"rm"
"-rf"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
COMMAND
"bash"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy.sh"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
"
${
CUDA_TOOLKIT_ROOT_DIR
}
"
)
execute_process
(
COMMAND
"chmod"
"755"
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
)
set
(
CUDA_NVCC_EXECUTABLE
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
set
(
CMAKE_CUDA_COMPILER
"
${
CMAKE_SOURCE_DIR
}
/tools/nvcc_lazy"
CACHE FILEPATH
""
FORCE
)
endif
()
tools/nvcc_lazy.sh
0 → 100644
浏览文件 @
210fa777
#!/usr/bin/env bash
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
echo
"#!/usr/bin/env bash"
>>
$1
echo
"unset GREP_OPTIONS"
>>
$1
echo
"set -e"
>>
$1
echo
-e
>>
$1
echo
"# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved."
>>
$1
echo
"#"
>>
$1
echo
"# Licensed under the Apache License, Version 2.0 (the
\"
License
\"
);"
>>
$1
echo
"# you may not use this file except in compliance with the License."
>>
$1
echo
"# You may obtain a copy of the License at"
>>
$1
echo
"#"
>>
$1
echo
"# http://www.apache.org/licenses/LICENSE-2.0"
>>
$1
echo
"#"
>>
$1
echo
"# Unless required by applicable law or agreed to in writing, software"
>>
$1
echo
"# distributed under the License is distributed on an
\"
AS IS
\"
BASIS,"
>>
$1
echo
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied."
>>
$1
echo
"# See the License for the specific language governing permissions and"
>>
$1
echo
"# limitations under the License."
>>
$1
echo
-e
>>
$1
echo
-e
>>
$1
echo
"## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY"
>>
$1
echo
-e
>>
$1
echo
"# set cicc PATH for Centos"
>>
$1
echo
"export PATH=
\$
PATH:
$2
/nvvm/bin"
>>
$1
echo
-e
>>
$1
echo
"# check nvcc version, if nvcc >= 11.7, just run nvcc itself"
>>
$1
echo
"CUDA_VERSION=
\$
(nvcc --version | grep -oP '(?<=V)
\d
*
\.\d
*')"
>>
$1
echo
"CUDA_VERSION_MAJOR=
\$
{CUDA_VERSION%.*}"
>>
$1
echo
"CUDA_VERSION_MINOR=
\$
{CUDA_VERSION#*.}"
>>
$1
echo
"if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then"
>>
$1
echo
" nvcc
\"\$
@
\"
"
>>
$1
echo
" exit"
>>
$1
echo
"fi"
>>
$1
echo
-e
>>
$1
echo
"BUILDDIR=
\$
(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)"
>>
$1
echo
"echo
\"\$
@
\"
>
\$
{BUILDDIR}/args"
>>
$1
echo
"BUILDSH=
\$
{BUILDDIR}/build.sh"
>>
$1
echo
"
$2
/bin/nvcc --dryrun --keep --keep-dir=
\$
{BUILDDIR}
\"\$
@
\"
2>&1 | sed -e 's/#
\\
$
//;/^rm/d' >
\$
BUILDSH"
>>
$1
echo
"sed -i -e '/^
\s
*--/d'
\$
BUILDSH"
>>
$1
echo
"sed -ne '1,/^cicc.*cudafe1.stub.c/p'
\$
{BUILDSH} >
\$
{BUILDSH}.pre"
>>
$1
echo
"sed -e '1,/^cicc.*cudafe1.stub.c/d'
\$
{BUILDSH} >
\$
{BUILDSH}.post"
>>
$1
echo
-e
>>
$1
echo
"sed -i -e '/LIBRARIES=/{s/
\s
//g;s/
\"\"
/ /g}'
\$
{BUILDSH}.pre"
>>
$1
echo
-e
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.pre"
>>
$1
echo
"STUBF=
\$
(find
\$
BUILDDIR -name *.cudafe1.stub.c)"
>>
$1
echo
"CUFILE=
\$
(basename -s '.cudafe1.stub.c'
\$
STUBF)"
>>
$1
echo
"sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/__sti____cudaRegisterAll
\(
.*{
\)
/__do____cudaRegisterAll
\1
/'
\$
STUBF"
>>
$1
echo
"# sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,
\\\"
===>
\$
{CUFILE} lazy-load? %d
\\\\\\\\
n
\\\"
, l); __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e
\"
/__do____cudaRegisterAll
\(
.*{
\)
/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(
\\\"
CUDA_MODULE_LOADING
\\\"
); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}
\"
\$
STUBF"
>>
$1
echo
"sed -i -e '/__try____cudaRegisterAll
\(
.*{
\)
/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}'
\$
STUBF"
>>
$1
echo
"sed -i -e 's/{
\(
__device_stub__
\)
/{__try____cudaRegisterAll(1);
\1
/'
\$
STUBF"
>>
$1
echo
"/usr/bin/env bash
\$
{BUILDSH}.post"
>>
$1
echo
"rm -rf
\$
BUILDDIR"
>>
$1
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录