diff --git a/.gitignore b/.gitignore index 4b53cfd8591686efc8c92a169620e75620ddbac1..4ddcff8adc7d589edca16b507c760187464e431d 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,7 @@ model_test Testing tools/__pycache__ +tools/nvcc_lazy # This file is automatically generated. # TODO(zhiqiang) Move this file to build directory. diff --git a/cmake/experiments/cuda_module_loading_lazy.cmake b/cmake/experiments/cuda_module_loading_lazy.cmake index bcbfaacad1240fac42f73d248bfed1c0bf814289..d1e07f57cb045e1164cf4bf8d8a72ad3454bc4af 100644 --- a/cmake/experiments/cuda_module_loading_lazy.cmake +++ b/cmake/experiments/cuda_module_loading_lazy.cmake @@ -1,4 +1,4 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,16 +16,15 @@ # cuda moduel lazy loading is supported by CUDA 11.7+ # this experiment option makes Paddle supports lazy loading before CUDA 11.7. -option(EXP_CUDA_MODULE_LOADING_LAZY "enable lazy cuda module loading" OFF) -if(${EXP_CUDA_MODULE_LOADING_LAZY}) - if(NOT ${ON_INFER} OR NOT ${LINUX}) +if(LINUX) + if(NOT ON_INFER) message( "EXP_CUDA_MODULE_LOADING_LAZY only works with ON_INFER=ON on Linux platforms" ) return() endif() - if(NOT ${CUDA_FOUND}) - message("EXP_CUDA_MODULE_LOADING_LAZY only works with CUDA") + if(NOT WITH_GPU) + message("EXP_CUDA_MODULE_LOADING_LAZY only works with GPU") return() endif() if(${CUDA_VERSION} VERSION_GREATER_EQUAL "11.7") @@ -41,6 +40,12 @@ if(${EXP_CUDA_MODULE_LOADING_LAZY}) CACHE BOOL "" FORCE) set(CMAKE_CUDA_FLAGS "--cudart shared") enable_language(CUDA) + execute_process( + COMMAND "rm" "-rf" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" + COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh" + COMMAND "bash" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy.sh" + "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" "${CUDA_TOOLKIT_ROOT_DIR}") + execute_process(COMMAND "chmod" "755" "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy") set(CUDA_NVCC_EXECUTABLE "${CMAKE_SOURCE_DIR}/tools/nvcc_lazy" CACHE FILEPATH "" FORCE) diff --git a/tools/nvcc_lazy b/tools/nvcc_lazy deleted file mode 100755 index a553c6f5dcdfd02ea2fb0a8827ade4dac920da30..0000000000000000000000000000000000000000 --- a/tools/nvcc_lazy +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -unset GREP_OPTIONS -set -e - -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY - -# set cicc PATH for Centos -export PATH=$PATH:/usr/local/cuda/nvvm/bin - -# check nvcc version, if nvcc >= 11.7, just run nvcc itself -CUDA_VERSION=$(nvcc --version | grep -oP '(?<=V)\d*\.\d*') -CUDA_VERSION_MAJOR=${CUDA_VERSION%.*} -CUDA_VERSION_MINOR=${CUDA_VERSION#*.} -if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then - nvcc "$@" - exit -fi - -BUILDDIR=$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX) -echo "$@" > ${BUILDDIR}/args -BUILDSH=${BUILDDIR}/build.sh -/usr/local/cuda/bin/nvcc --dryrun --keep --keep-dir=${BUILDDIR} "$@" 2>&1 | sed -e 's/#\$ //;/^rm/d' > $BUILDSH -sed -i -e '/^\s*--/d' $BUILDSH -sed -ne '1,/^cicc.*cudafe1.stub.c/p' ${BUILDSH} > ${BUILDSH}.pre -sed -e '1,/^cicc.*cudafe1.stub.c/d' ${BUILDSH} > ${BUILDSH}.post - -sed -i -e '/LIBRARIES=/{s/\s//g;s/""/ /g}' ${BUILDSH}.pre - -/usr/bin/env bash ${BUILDSH}.pre -STUBF=$(find $BUILDDIR -name *.cudafe1.stub.c) -CUFILE=$(basename -s '.cudafe1.stub.c' $STUBF) -sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' $STUBF -sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' $STUBF -# sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\"===> ${CUFILE} lazy-load? %d\\\\n\", l); __do____cudaRegisterAll();}" $STUBF -sed -i -e "/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\"CUDA_MODULE_LOADING\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}" $STUBF -sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' $STUBF -sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' $STUBF -/usr/bin/env bash ${BUILDSH}.post -rm -rf $BUILDDIR diff --git a/tools/nvcc_lazy.sh b/tools/nvcc_lazy.sh new file mode 100644 index 0000000000000000000000000000000000000000..011ac564cf91ba41b4c851ce7c52187658c359b8 --- /dev/null +++ b/tools/nvcc_lazy.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "#!/usr/bin/env bash" >> $1 +echo "unset GREP_OPTIONS" >> $1 +echo "set -e" >> $1 +echo -e >> $1 +echo "if [[ \$# -le 8 ]]; then" >> $1 +echo " nvcc \"\$@\"" >> $1 +echo " exit 0" >> $1 +echo "fi" >> $1 +echo -e >> $1 +echo "# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved." >> $1 +echo "#" >> $1 +echo "# Licensed under the Apache License, Version 2.0 (the \"License\");" >> $1 +echo "# you may not use this file except in compliance with the License." >> $1 +echo "# You may obtain a copy of the License at" >> $1 +echo "#" >> $1 +echo "# http://www.apache.org/licenses/LICENSE-2.0" >> $1 +echo "#" >> $1 +echo "# Unless required by applicable law or agreed to in writing, software" >> $1 +echo "# distributed under the License is distributed on an \"AS IS\" BASIS," >> $1 +echo "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." >> $1 +echo "# See the License for the specific language governing permissions and" >> $1 +echo "# limitations under the License." >> $1 +echo -e >> $1 +echo -e >> $1 +echo "## CUDA_MODULE_LOADING=EAGER,DEFAULT,LAZY" >> $1 +echo -e >> $1 +echo "# set cicc PATH for Centos" >> $1 +echo "export PATH=\$PATH:$2/bin" >> $1 +echo "export PATH=\$PATH:$2/nvvm/bin" >> $1 +echo -e >> $1 +echo "# check nvcc version, if nvcc >= 11.7, just run nvcc itself" >> $1 +echo "CUDA_VERSION=\$(nvcc --version | grep -oP '(?<=V)\d*\.\d*')" >> $1 +echo "CUDA_VERSION_MAJOR=\${CUDA_VERSION%.*}" >> $1 +echo "CUDA_VERSION_MINOR=\${CUDA_VERSION#*.}" >> $1 +echo "if (( CUDA_VERSION_MAJOR > 11 || (CUDA_VERSION_MAJOR == 11 && CUDA_VERSION_MINOR >= 7) )); then" >> $1 +echo " nvcc \"\$@\"" >> $1 +echo " exit" >> $1 +echo "fi" >> $1 +echo -e >> $1 +echo "BUILDDIR=\$(mktemp -d /tmp/nvcc-lazy-build.XXXXXXXX)" >> $1 +echo "echo \"\$@\" > \${BUILDDIR}/args" >> $1 +echo "BUILDSH=\${BUILDDIR}/build.sh" >> $1 +echo "$2/bin/nvcc --dryrun --keep --keep-dir=\${BUILDDIR} \"\$@\" 2>&1 | sed -e 's/#\\$ //;/^rm/d' > \$BUILDSH" >> $1 +echo "sed -i -e '/^\s*--/d' \$BUILDSH" >> $1 +echo "sed -ne '1,/^cicc.*cudafe1.stub.c/p' \${BUILDSH} > \${BUILDSH}.pre" >> $1 +echo "sed -e '1,/^cicc.*cudafe1.stub.c/d' \${BUILDSH} > \${BUILDSH}.post" >> $1 +echo -e >> $1 +echo "sed -i -e '/LIBRARIES=/{s/\s//g;s/\"\"/ /g}' \${BUILDSH}.pre" >> $1 +echo -e >> $1 +echo "/usr/bin/env bash \${BUILDSH}.pre" >> $1 +echo "STUBF=\$(find \$BUILDDIR -name *.cudafe1.stub.c)" >> $1 +echo "CUFILE=\$(basename -s '.cudafe1.stub.c' \$STUBF)" >> $1 +echo "sed -i -e '/__sti____cudaRegisterAll.*__attribute__/a static void __try____cudaRegisterAll(int);' \$STUBF" >> $1 +echo "sed -i -e 's/__sti____cudaRegisterAll\(.*{\)/__do____cudaRegisterAll\1/' \$STUBF" >> $1 +echo "# sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; fprintf(stderr,\\\"===> \${CUFILE} lazy-load? %d\\\\\\\\n\\\", l); __do____cudaRegisterAll();}\" \$STUBF" >> $1 +echo "sed -i -e \"/__do____cudaRegisterAll\(.*{\)/a static void __try____cudaRegisterAll(int l){static int _ls = 0; if (_ls) return; const char* lm = getenv(\\\"CUDA_MODULE_LOADING\\\"); if (lm&&(lm[0]=='L')&&(lm[1]=='A')&&(lm[2]=='Z')&&(lm[3]=='Y')&&(l!=1)) return; _ls = 1; __do____cudaRegisterAll();}\" \$STUBF" >> $1 +echo "sed -i -e '/__try____cudaRegisterAll\(.*{\)/a static void __sti____cudaRegisterAll(void){__try____cudaRegisterAll(0);}' \$STUBF" >> $1 +echo "sed -i -e 's/{\(__device_stub__\)/{__try____cudaRegisterAll(1);\1/' \$STUBF" >> $1 +echo "/usr/bin/env bash \${BUILDSH}.post" >> $1 +echo "rm -rf \$BUILDDIR" >> $1