未验证 提交 2f3b393d 编写于 作者: Z Zhanlue Yang 提交者: GitHub

New whl release strategy with pruned nv_fatbin (#35239)

[Background]
Expansion in code size can be irreversible in the long run, leading to huge release packages which
not only hampers user experience but also exceeds a hard limit of pypi.

In such, NV_FATBIN section takes up 86% of the compiled dylib size, owing to the vast number of GPU
arches supported.

This PR aims to prune this NV_FATBIN.

[Solution]
In the new release strategy, two types of whl packages will be involved:

Cubin PIP package:
PIP package maintains a smaller window for GPU arches support, containing
sm_60, sm_70, sm_75, sm_80 cubins, covering Pascal - Ampere arches

JIT release package:
This is a backup for Cubin PIP package, containing compute_35, compute_50, compute_60,
compute_70, compute_75, compute_80, with best performance and GPU arches coverage.

However, it takes around 10 min to install due to the JIT compilation.

[How to use]
The new release strategy is disabled by default.
To compile for Cubin PIP package, add this to cmake: -DCUBIN_RELEASE_PIP
To compile for JIT release package, add this to cmake: -DJIT_RELEASE_WHL
上级 d9f59fd1
...@@ -222,6 +222,8 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF) ...@@ -222,6 +222,8 @@ option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
option(WITH_MUSL "Compile with musl libc instead of gblic" OFF) option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF) option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
option(WITH_STRIP "Strip so files of Whl packages" OFF) option(WITH_STRIP "Strip so files of Whl packages" OFF)
option(NEW_RELEASE_CUBIN "PaddlePaddle next-level release strategy for pypi cubin package" OFF)
option(NEW_RELEASE_JIT "PaddlePaddle next-level release strategy for backup jit package" OFF)
# PY_VERSION # PY_VERSION
if(NOT PY_VERSION) if(NOT PY_VERSION)
......
...@@ -3,10 +3,22 @@ if(NOT WITH_GPU) ...@@ -3,10 +3,22 @@ if(NOT WITH_GPU)
endif() endif()
if (WITH_NV_JETSON) if(WITH_NV_JETSON)
add_definitions(-DWITH_NV_JETSON) add_definitions(-DWITH_NV_JETSON)
set(paddle_known_gpu_archs "53 62 72") set(paddle_known_gpu_archs "53 62 72")
set(paddle_known_gpu_archs10 "53 62 72") set(paddle_known_gpu_archs10 "53 62 72")
elseif(NEW_RELEASE_CUBIN)
message("Using New Release Strategy - Cubin Packge")
add_definitions(-DNEW_RELEASE_CUBIN)
set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "50 60 70 75")
set(paddle_known_gpu_archs11 "60 70 75 80")
elseif(NEW_RELEASE_JIT)
message("Using New Release Strategy - JIT Packge")
add_definitions(-DNEW_RELEASE_JIT)
set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
set(paddle_known_gpu_archs10 "35 50 60 70 75")
set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
else() else()
set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80") set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75") set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
...@@ -130,11 +142,17 @@ function(select_nvcc_arch_flags out_variable) ...@@ -130,11 +142,17 @@ function(select_nvcc_arch_flags out_variable)
set(cuda_arch_bin ${CUDA_ARCH_BIN}) set(cuda_arch_bin ${CUDA_ARCH_BIN})
endif() endif()
if(NEW_RELEASE_JIT)
set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
set(cuda_arch_bin "")
endif()
# remove dots and convert to lists # remove dots and convert to lists
string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}") string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}") string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}")
string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}") string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}") string(REGEX MATCHALL "[0-9]+" cuda_arch_ptx "${cuda_arch_ptx}")
list(REMOVE_DUPLICATES cuda_arch_bin) list(REMOVE_DUPLICATES cuda_arch_bin)
list(REMOVE_DUPLICATES cuda_arch_ptx) list(REMOVE_DUPLICATES cuda_arch_ptx)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
try: try:
from paddle.version import full_version as __version__ from paddle.version import full_version as __version__
from paddle.version import commit as __git_commit__ from paddle.version import commit as __git_commit__
from paddle.cuda_env import *
except ImportError: except ImportError:
import sys import sys
sys.stderr.write('''Warning with import paddle: you should not sys.stderr.write('''Warning with import paddle: you should not
......
...@@ -106,6 +106,20 @@ def mkl(): ...@@ -106,6 +106,20 @@ def mkl():
write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py') write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py')
def write_cuda_env_config_py(filename='paddle/cuda_env.py'):
cnt = ""
if '${JIT_RELEASE_WHL}' == 'ON':
cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
#
import os
os.environ['CUDA_CACHE_MAXSIZE'] = '805306368'
'''
with open(filename, 'w') as f:
f.write(cnt)
write_cuda_env_config_py(filename='@PADDLE_BINARY_DIR@/python/paddle/cuda_env.py')
def write_distributed_training_mode_py(filename='paddle/fluid/incubate/fleet/parameter_server/version.py'): def write_distributed_training_mode_py(filename='paddle/fluid/incubate/fleet/parameter_server/version.py'):
cnt = '''from __future__ import print_function cnt = '''from __future__ import print_function
...@@ -414,11 +428,10 @@ class InstallCommand(InstallCommandBase): ...@@ -414,11 +428,10 @@ class InstallCommand(InstallCommandBase):
def finalize_options(self): def finalize_options(self):
ret = InstallCommandBase.finalize_options(self) ret = InstallCommandBase.finalize_options(self)
self.install_lib = self.install_platlib self.install_lib = self.install_platlib
self.install_headers = os.path.join(self.install_platlib, 'paddle', self.install_headers = os.path.join(self.install_platlib, 'paddle', 'include')
'include')
return ret return ret
class InstallHeaders(Command): class InstallHeaders(Command):
"""Override how headers are copied. """Override how headers are copied.
""" """
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册