From 2f3b393d151afd6e82cb88095cb2079a9be65f61 Mon Sep 17 00:00:00 2001
From: Zhanlue Yang <jim19930609@gmail.com>
Date: Tue, 31 Aug 2021 18:09:18 +0800
Subject: [PATCH] New whl release strategy with pruned nv_fatbin (#35239)

[Background]
Expansion in code size can be irreversible in the long run, leading to huge release packages which
not only hampers user experience but also exceeds a hard limit of pypi.

In such, NV_FATBIN section takes up 86% of the compiled dylib size, owing to the vast number of GPU
arches supported.

This PR aims to prune this NV_FATBIN.

[Solution]
In the new release strategy, two types of whl packages will be involved:

Cubin PIP package:
PIP package maintains a smaller window for GPU arches support, containing
sm_60, sm_70, sm_75, sm_80 cubins, covering Pascal - Ampere arches

JIT release package:
This is a backup for Cubin PIP package, containing compute_35, compute_50, compute_60,
compute_70, compute_75, compute_80, with best performance and GPU arches coverage.

However, it takes around 10 min to install due to the JIT compilation.

[How to use]
The new release strategy is disabled by default.
To compile for Cubin PIP package, add this to cmake: -DCUBIN_RELEASE_PIP
To compile for JIT release package, add this to cmake: -DJIT_RELEASE_WHL
---
 CMakeLists.txt            |  2 ++
 cmake/cuda.cmake          | 20 +++++++++++++++++++-
 python/paddle/__init__.py |  2 +-
 python/setup.py.in        | 19 ++++++++++++++++---
 4 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 83191254f1..0f25e7d9dc 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -222,6 +222,8 @@ option(WITH_MIPS   "Compile PaddlePaddle with mips support"         OFF)
 option(WITH_MUSL        "Compile with musl libc instead of gblic"  OFF)
 option(WITH_UNITY_BUILD "Compile with UnityBuild mode"             OFF)
 option(WITH_STRIP       "Strip so files of Whl packages"         OFF)
+option(NEW_RELEASE_CUBIN   "PaddlePaddle next-level release strategy for pypi cubin package"             OFF)
+option(NEW_RELEASE_JIT   "PaddlePaddle next-level release strategy for backup jit package"             OFF)
 
 # PY_VERSION
 if(NOT PY_VERSION)
diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index e1a9324650..f9c896acd8 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -3,10 +3,22 @@ if(NOT WITH_GPU)
 endif()
 
 
-if (WITH_NV_JETSON)
+if(WITH_NV_JETSON)
   add_definitions(-DWITH_NV_JETSON)
   set(paddle_known_gpu_archs "53 62 72")
   set(paddle_known_gpu_archs10 "53 62 72")
+elseif(NEW_RELEASE_CUBIN)
+  message("Using New Release Strategy - Cubin Packge")
+  add_definitions(-DNEW_RELEASE_CUBIN)
+  set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
+  set(paddle_known_gpu_archs10 "50 60 70 75")
+  set(paddle_known_gpu_archs11 "60 70 75 80")
+elseif(NEW_RELEASE_JIT)
+  message("Using New Release Strategy - JIT Packge")
+  add_definitions(-DNEW_RELEASE_JIT)
+  set(paddle_known_gpu_archs "35 37 50 52 60 61 70 75 80 86")
+  set(paddle_known_gpu_archs10 "35 50 60 70 75")
+  set(paddle_known_gpu_archs11 "35 50 60 70 75 80")
 else()
   set(paddle_known_gpu_archs "35 50 52 60 61 70 75 80")
   set(paddle_known_gpu_archs10 "35 50 52 60 61 70 75")
@@ -130,11 +142,17 @@ function(select_nvcc_arch_flags out_variable)
     set(cuda_arch_bin ${CUDA_ARCH_BIN})
   endif()
 
+  if(NEW_RELEASE_JIT)
+      set(cuda_arch_ptx "${cuda_arch_ptx}${cuda_arch_bin}")
+      set(cuda_arch_bin "")
+  endif()
+
   # remove dots and convert to lists
   string(REGEX REPLACE "\\." "" cuda_arch_bin "${cuda_arch_bin}")
   string(REGEX REPLACE "\\." "" cuda_arch_ptx "${CUDA_ARCH_PTX}")
   string(REGEX MATCHALL "[0-9()]+" cuda_arch_bin "${cuda_arch_bin}")
   string(REGEX MATCHALL "[0-9]+"   cuda_arch_ptx "${cuda_arch_ptx}")
+
   list(REMOVE_DUPLICATES cuda_arch_bin)
   list(REMOVE_DUPLICATES cuda_arch_ptx)
 
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 1c38d51979..ce338275b2 100755
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -14,7 +14,7 @@
 try:
     from paddle.version import full_version as __version__
     from paddle.version import commit as __git_commit__
-
+    from paddle.cuda_env import *
 except ImportError:
     import sys
     sys.stderr.write('''Warning with import paddle: you should not
diff --git a/python/setup.py.in b/python/setup.py.in
index 4990544926..8f9f973d93 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -106,6 +106,20 @@ def mkl():
 
 write_version_py(filename='@PADDLE_BINARY_DIR@/python/paddle/version.py')
 
+def write_cuda_env_config_py(filename='paddle/cuda_env.py'):
+    cnt = ""
+    if '${JIT_RELEASE_WHL}' == 'ON':
+        cnt = '''# THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
+#
+import os
+os.environ['CUDA_CACHE_MAXSIZE'] = '805306368'
+'''
+
+    with open(filename, 'w') as f:
+        f.write(cnt)
+
+write_cuda_env_config_py(filename='@PADDLE_BINARY_DIR@/python/paddle/cuda_env.py')
+
 def write_distributed_training_mode_py(filename='paddle/fluid/incubate/fleet/parameter_server/version.py'):
     cnt = '''from __future__ import print_function
 
@@ -414,11 +428,10 @@ class InstallCommand(InstallCommandBase):
     def finalize_options(self):
         ret = InstallCommandBase.finalize_options(self)
         self.install_lib = self.install_platlib
-        self.install_headers = os.path.join(self.install_platlib, 'paddle',
-                                            'include')
+        self.install_headers = os.path.join(self.install_platlib, 'paddle', 'include')
+        
         return ret
 
-
 class InstallHeaders(Command):
     """Override how headers are copied.
     """
-- 
GitLab