From 3d9fe71e3c043b715134f9991a85a6afb4cd6423 Mon Sep 17 00:00:00 2001 From: Haipeng Wang Date: Fri, 27 May 2022 14:52:51 +0800 Subject: [PATCH] experimental nvcc-lazy-module-loading (#43037) * experimental nvcc-lazy-module-loading * remove two empty last line from two files --- CMakeLists.txt | 1 + cmake/experimental.cmake | 17 +++++++ .../cuda_module_loading_lazy.cmake | 40 +++++++++++++++ tools/nvcc_lazy | 49 +++++++++++++++++++ 4 files changed, 107 insertions(+) create mode 100644 cmake/experimental.cmake create mode 100644 cmake/experiments/cuda_module_loading_lazy.cmake create mode 100755 tools/nvcc_lazy diff --git a/CMakeLists.txt b/CMakeLists.txt index 433081ee22..f3ed08d56e 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,7 @@ option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" # Note(zhouwei): It use option above, so put here include(init) include(generic) # simplify cmake module +include(experimental) # experimental build options if (WITH_GPU AND WITH_XPU) message(FATAL_ERROR "Error when compile GPU and XPU at the same time") diff --git a/cmake/experimental.cmake b/cmake/experimental.cmake new file mode 100644 index 0000000000..55e7fe263f --- /dev/null +++ b/cmake/experimental.cmake @@ -0,0 +1,17 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this file contains experimental build options + +include(experiments/cuda_module_loading_lazy) diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake new file mode 100644 index 0000000000..ef6a51b594 --- /dev/null +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -0,0 +1,40 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# this file contains experimental build options for lazy cuda module loading +# cuda moduel lazy loading is supported by CUDA 11.6+ +# this experiment option makes Paddle supports lazy loading before CUDA 11.6. + +option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF) +if (${EXP_CUDA_MODULE_LOADING_LAZY}) + if (NOT ${ON_INFER} OR NOT ${LINUX}) + message("EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms") + return() + endif () + if (NOT ${CUDA_FOUND}) + message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA") + return() + endif () + if (${CUDA_VERSION} VERSION_GREATER_EQUAL "11.6") + message("cuda 11.6+ already support lazy module loading") + return() + endif () + + message("for cuda before 11.6, libcudart.so must be used for the lazy module loading trick to work, instead of libcudart_static.a") + set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "" FORCE) + set(CMAKE_CUDA_FLAGS "--cudart shared") + enable_language(CUDA) + set(CUDA_NVCC_EXECUTABLE "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE) + set(CMAKE_CUDA_COMPILER "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE) +endif() diff --git a/tools/nvcc_lazy b/tools/nvcc_lazy new file mode 100755 index 0000000000..9cb49b04ff --- /dev/null +++ b/tools/nvcc_lazy @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY + +# check nvcc version, if nvcc >= 11.6, just run nvcc itself +CUDA_VERSION=$(nvcc --version | grep -oP '(?<=cuda_)\d*\.\d*') +CUDA_VERSION_MAJOR=${CUDA_VERSION%.*} +CUDA_VERSION_MINOR=${CUDA_VERSION#*.} +if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 6) )); then + nvcc "$@" + exit +fi + +BUILDDIR=$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX) +echo "$@" > ${BUILDDIR}/args +BUILDSH=${BUILDDIR}/build.sh +/usr/local/cuda/bin/nvcc --dryrun --keep --keep-dir=${BUILDDIR} "$@" 2>&1 | sed -e 's/#\$ //;/^rm/d' > $BUILDSH +sed -i -e '/^\s*--/d' $BUILDSH +sed -ne '1,/^cicc.*cudafe1.stub.c/p' ${BUILDSH} > ${BUILDSH}.pre +sed -e '1,/^cicc.*cudafe1.stub.c/d' ${BUILDSH} > ${BUILDSH}.post + +sed -i -e '/LIBRARIES=/{s/\s//g;s/""/ /g}' ${BUILDSH}.pre + +/usr/bin/env bash ${BUILDSH}.pre +STUBF=$(find $BUILDDIR -name *.cudafe1.stub.c) +CUFILE=$(basename -s '.cudafe1.stub.c' $STUBF) +sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' $STUBF +sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' $STUBF +# sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\"===> ${CUFILE} lazy-load? %d\\\\n\", l); __do____cudaRegisterAll();}" $STUBF +sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}" $STUBF +sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' $STUBF +sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' $STUBF +/usr/bin/env bash ${BUILDSH}.post +rm -rf $BUILDDIR -- GitLab