未验证 提交 3d9fe71e 编写于 作者: H Haipeng Wang 提交者: GitHub

experimental nvcc-lazy-module-loading (#43037)

* experimental nvcc-lazy-module-loading

* remove two empty last line from two files
上级 668e235c
......@@ -60,6 +60,7 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME"
# Note(zhouwei): It use option above, so put here
include(init)
include(generic) # simplify cmake module
include(experimental) # experimental build options
if (WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options
include(experiments/cuda_module_loading_lazy)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# this file contains experimental build options for lazy cuda module loading
# cuda moduel lazy loading is supported by CUDA 11.6+
# this experiment option makes Paddle supports lazy loading before CUDA 11.6.
option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF)
if (${EXP_CUDA_MODULE_LOADING_LAZY})
if (NOT ${ON_INFER} OR NOT ${LINUX})
message("EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms")
return()
endif ()
if (NOT ${CUDA_FOUND})
message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA")
return()
endif ()
if (${CUDA_VERSION} VERSION_GREATER_EQUAL "11.6")
message("cuda 11.6+ already support lazy module loading")
return()
endif ()
message("for cuda before 11.6, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a")
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "" FORCE)
set(CMAKE_CUDA_FLAGS "--cudart shared")
enable_language(CUDA)
set(CUDA_NVCC_EXECUTABLE "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE)
set(CMAKE_CUDA_COMPILER "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE)
endif()
#!/usr/bin/env bash
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY
# check nvcc version, if nvcc >= 11.6, just run nvcc itself
CUDA_VERSION=$(nvcc --version | grep -oP '(?<=cuda_)\d*\.\d*')
CUDA_VERSION_MAJOR=${CUDA_VERSION%.*}
CUDA_VERSION_MINOR=${CUDA_VERSION#*.}
if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 6) )); then
nvcc "$@"
exit
fi
BUILDDIR=$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)
echo "$@" > ${BUILDDIR}/args
BUILDSH=${BUILDDIR}/build.sh
/usr/local/cuda/bin/nvcc --dryrun --keep --keep-dir=${BUILDDIR} "$@" 2>&1 | sed -e 's/#\$ //;/^rm/d' > $BUILDSH
sed -i -e '/^\s*--/d' $BUILDSH
sed -ne '1,/^cicc.*cudafe1.stub.c/p' ${BUILDSH} > ${BUILDSH}.pre
sed -e '1,/^cicc.*cudafe1.stub.c/d' ${BUILDSH} > ${BUILDSH}.post
sed -i -e '/LIBRARIES=/{s/\s//g;s/""/ /g}' ${BUILDSH}.pre
/usr/bin/env bash ${BUILDSH}.pre
STUBF=$(find $BUILDDIR -name *.cudafe1.stub.c)
CUFILE=$(basename -s '.cudafe1.stub.c' $STUBF)
sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' $STUBF
sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' $STUBF
# sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\"===> ${CUFILE} lazy-load? %d\\\\n\", l); __do____cudaRegisterAll();}" $STUBF
sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}" $STUBF
sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' $STUBF
sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' $STUBF
/usr/bin/env bash ${BUILDSH}.post
rm -rf $BUILDDIR
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册