未验证 提交 4ce66826 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

Support sccache to speed up compilation on Windows (#34019)

* Support sccache to speed up compilation on Windows

* Support sccache to speed up compilation on Windows
上级 36080ae8
......@@ -97,10 +97,6 @@ if(WIN32)
if (MSVC_STATIC_CRT)
message(STATUS "Use static C runtime time, refer to https://docs.microsoft.com/en-us/cpp/c-runtime-library/crt-library-features?view=vs-2019")
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /MTd")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /MT")
foreach(flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
......
# Use ccache if found ccache program
find_program(CCACHE_PATH ccache)
if(NOT WIN32)
find_program(CCACHE_PATH ccache)
if(CCACHE_PATH)
execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output)
execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory)
string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
message(STATUS "ccache is founded, use ccache to speed up compile on Unix.")
# show statistics summary of ccache
message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory})
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)
elseif("${CMAKE_GENERATOR}" STREQUAL "Ninja")
# (Note:zhouwei25) Only Ninja Generator can support sccache now
find_program(SCCACHE_PATH sccache)
if(CCACHE_PATH)
execute_process(COMMAND ccache -V OUTPUT_VARIABLE ccache_output)
execute_process(COMMAND ccache -s cache directory OUTPUT_VARIABLE cache_directory)
string(REGEX MATCH "[0-9]+.[0-9]+" ccache_version ${ccache_output})
message(STATUS "Ccache is founded, use ccache to speed up compile.")
# show statistics summary of ccache
message("ccache version\t\t\t " ${ccache_version} "\n" ${cache_directory})
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_PATH})
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_PATH})
endif(CCACHE_PATH)
if(SCCACHE_PATH)
execute_process(COMMAND sccache -V OUTPUT_VARIABLE sccache_version)
message(STATUS "${sccache_version} is founded, use [${SCCACHE_PATH}] to speed up compile on Windows.")
set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_PATH})
set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_PATH})
# (Note:zhouwei25) sccache for cuda compiler has bug so that it can't be hit
# refer to https://github.com/mozilla/sccache/issues/1017, so we fix it
set(CMAKE_CUDA_COMPILER_LAUNCHER ${SCCACHE_PATH})
endif(SCCACHE_PATH)
endif()
......@@ -218,8 +218,6 @@ if(WIN32)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler \"/wd4244 /wd4267 /wd4819 \"")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler /bigobj")
if(MSVC_STATIC_CRT)
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -Xcompiler /MTd")
set(CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -Xcompiler /MT")
foreach(flag_var
CMAKE_CUDA_FLAGS CMAKE_CUDA_FLAGS_DEBUG CMAKE_CUDA_FLAGS_RELEASE
CMAKE_CUDA_FLAGS_MINSIZEREL CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
......
......@@ -14,27 +14,27 @@
include(ExternalProject)
set(CUB_PREFIX_DIR ${THIRD_PARTY_PATH}/cub)
set(CUB_SOURCE_DIR ${THIRD_PARTY_PATH}/cub/src/extern_cub)
set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git)
set(CUB_TAG 1.8.0)
# Note(zhouwei): extern_cub has code __FILE_, If the path of extern_cub is changed,
# it will effect about 30+ cu files sccache hit and slow compile speed on windows.
# Therefore, a fixed CUB_PATH will be input to increase the sccache hit rate.
set(CUB_PATH "${THIRD_PARTY_PATH}/cub" CACHE STRING "A path setting for external_cub path.")
set(CUB_PREFIX_DIR ${CUB_PATH})
cache_third_party(extern_cub
REPOSITORY ${CUB_REPOSITORY}
TAG ${CUB_TAG}
DIR CUB_SOURCE_DIR)
set(CUB_REPOSITORY ${GIT_URL}/NVlabs/cub.git)
set(CUB_TAG 1.8.0)
SET(CUB_INCLUDE_DIR ${CUB_SOURCE_DIR})
SET(CUB_INCLUDE_DIR ${CUB_PREFIX_DIR}/src/extern_cub)
message("CUB_INCLUDE_DIR is ${CUB_INCLUDE_DIR}")
include_directories(${CUB_INCLUDE_DIR})
ExternalProject_Add(
extern_cub
${EXTERNAL_PROJECT_LOG_ARGS}
${SHALLOW_CLONE}
"${CUB_DOWNLOAD_CMD}"
GIT_REPOSITORY ${CUB_REPOSITORY}
GIT_TAG ${CUB_TAG}
PREFIX ${CUB_PREFIX_DIR}
SOURCE_DIR ${CUB_SOURCE_DIR}
UPDATE_COMMAND ""
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
......
......@@ -28,7 +28,7 @@ add_subdirectory(io)
#ddim lib
proto_library(framework_proto SRCS framework.proto)
proto_library(op_def_proto SRCS op_def.proto)
proto_library(op_def_proto SRCS op_def.proto DEPS framework_proto)
cc_library(op_def_api SRCS op_def_api.cc DEPS op_def_proto boost)
FILE(GLOB OP_DEF_FILES ${PADDLE_SOURCE_DIR}/paddle/fluid/operators/compat/*.pbtxt)
......
......@@ -26,12 +26,10 @@ extra {
attrs {
name: "x_data_format"
type: STRING
# no longer to use
}
attrs {
name: "y_data_format"
type: STRING
# no longer to use
}
attrs {
name: "Scale_x"
......
......@@ -67,6 +67,7 @@ if not defined WITH_STATIC_LIB set WITH_STATIC_LIB=ON
if not defined WITH_TPCACHE set WITH_TPCACHE=OFF
if not defined WITH_CLCACHE set WITH_CLCACHE=OFF
if not defined WITH_CACHE set WITH_CACHE=OFF
if not defined WITH_SCCACHE set WITH_SCCACHE=OFF
if not defined WITH_UNITY_BUILD set WITH_UNITY_BUILD=OFF
if not defined INFERENCE_DEMO_INSTALL_DIR set INFERENCE_DEMO_INSTALL_DIR=%cache_dir:\=/%/inference_demo
if not defined LOG_LEVEL set LOG_LEVEL=normal
......@@ -75,7 +76,21 @@ if not defined NIGHTLY_MODE set PRECISION_TEST=OFF
if not defined retry_times set retry_times=3
if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
rem -------set cache build directory-----------
rem ------initialize the python environment------
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%\Scripts;%PYTHON_ROOT%;%PATH%
if "%WITH_PYTHON%" == "ON" (
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if !ERRORLEVEL! NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
)
rem -------Caching strategy 1: keep build directory for incremental compilation-----------
rmdir build\python /s/q
rmdir build\paddle\third_party\externalError /s/q
rem rmdir build\paddle\fluid\pybind /s/q
......@@ -123,12 +138,6 @@ if %day_now% NEQ %day_before% (
goto :mkbuild
)
:: git diff HEAD origin/develop --stat --name-only
:: git diff HEAD origin/develop --stat --name-only | findstr ".cmake CMakeLists.txt paddle_build.bat"
:: if %ERRORLEVEL% EQU 0 (
:: rmdir build /s/q
:: )
:mkbuild
if not exist build (
echo Windows build cache FALSE
......@@ -143,8 +152,33 @@ cd /d build
dir .
dir %cache_dir%
dir paddle\fluid\pybind\Release
rem -------Caching strategy 1: End --------------------------------
rem -------Caching strategy 2: sccache decorate compiler-----------
if "%WITH_SCCACHE%"=="ON" (
rem cmd /C sccache -V || call :install_sccache
sccache --stop-server 2> NUL
if not exist D:\sccache mkdir D:\sccache
set SCCACHE_DIR=D:\sccache\.cache
set SCCACHE_CACHE_SIZE=30G
set SCCACHE_ERROR_LOG=D:\sccache\sccache_log.txt
set SCCACHE_LOG=quiet
sccache --start-server
if !errorlevel! NEQ 0 exit /b 1
sccache -z
goto :CASE_%1
) else (
del %PYTHON_ROOT%\sccache.exe
goto :CASE_%1
)
goto :CASE_%1
:install_sccache
echo There is not sccache in this PC, will install sccache.
echo Download package from https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe
%PYTHON_ROOT%\python.exe -c "import wget;wget.download('https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe')"
xcopy sccache.exe %PYTHON_ROOT%\Scripts\ /Y
goto:eof
rem -------Caching strategy 2: End --------------------------------
echo "Usage: paddle_build.bat [OPTION]"
echo "OPTION:"
......@@ -266,22 +300,7 @@ rem ------show summary of current GPU environment----------
cmake --version
if "%WITH_GPU%"=="ON" (
nvcc --version
nvidia-smi
)
rem ------initialize the python environment------
@ECHO OFF
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH%
if "%WITH_PYTHON%" == "ON" (
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if !ERRORLEVEL! NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
nvidia-smi 2>NUL
)
rem ------pre install clcache and init config----------
......@@ -333,10 +352,11 @@ echo echo ${md5_content}^>md5.txt >> cache.sh
set /p md5=< md5.txt
if "%WITH_GPU%"=="ON" (
set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party_GPU/%md5%
set THIRD_PARTY_HOME=%cache_dir:\=/%/third_party_GPU
) else (
set THIRD_PARTY_PATH=%cache_dir:\=/%/third_party/%md5%
set THIRD_PARTY_HOME=%cache_dir:\=/%/third_party
)
set THIRD_PARTY_PATH=%THIRD_PARTY_HOME%/%md5%
:cmake_impl
echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^
......@@ -344,14 +364,14 @@ echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -D
-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^
-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^
-DWITH_TENSORRT=%WITH_TENSORRT% -DTENSORRT_ROOT="%TENSORRT_ROOT%" -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% ^
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME%
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% -DCUB_PATH=%THIRD_PARTY_HOME%/cub
cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% ^
-DWITH_TESTING=%WITH_TESTING% -DWITH_PYTHON=%WITH_PYTHON% -DPYTHON_EXECUTABLE=%PYTHON_EXECUTABLE% -DON_INFER=%ON_INFER% ^
-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^
-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^
-DWITH_TENSORRT=%WITH_TENSORRT% -DTENSORRT_ROOT="%TENSORRT_ROOT%" -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% ^
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME%
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% -DCUB_PATH=%THIRD_PARTY_HOME%/cub
goto:eof
:cmake_error
......@@ -454,7 +474,9 @@ echo 0 > %cache_dir%\error_code.txt
type %cache_dir%\error_code.txt
:: ci will collect clcache hit rate
rem goto :collect_clcache_hits
if "%WITH_SCCACHE%"=="ON" (
call :collect_sccache_hits
)
goto:eof
......@@ -666,7 +688,7 @@ echo cmake .. -G %GENERATOR% -DCMAKE_BUILD_TYPE=Release -DWITH_AVX=%WITH_AVX% -D
-DWITH_INFERENCE_API_TEST=%WITH_INFERENCE_API_TEST% -DTHIRD_PARTY_PATH=%THIRD_PARTY_PATH% ^
-DINFERENCE_DEMO_INSTALL_DIR=%INFERENCE_DEMO_INSTALL_DIR% -DWITH_STATIC_LIB=%WITH_STATIC_LIB% ^
-DWITH_TENSORRT=%WITH_TENSORRT% -DTENSORRT_ROOT="%TENSORRT_ROOT%" -DMSVC_STATIC_CRT=%MSVC_STATIC_CRT% ^
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% >> check_change_of_unittest.sh
-DWITH_UNITY_BUILD=%WITH_UNITY_BUILD% -DCUDA_ARCH_NAME=%CUDA_ARCH_NAME% >> check_change_of_unittest.sh
echo cat ^<^<EOF>> check_change_of_unittest.sh
echo ============================================ >> check_change_of_unittest.sh
echo Generate unit tests.spec of develop. >> check_change_of_unittest.sh
......@@ -778,16 +800,22 @@ echo ipipe_log_param_Windows_%tempTaskName: =_%_Time: %cost_secs%s
goto:eof
:collect_clcache_hits
for /f "tokens=2,4" %%i in ('clcache.exe -s ^| findstr "entries hits"') do set %%i=%%j
if %hits% EQU 0 (
echo "clcache hit rate: 0%%"
echo ipipe_log_param_Clcache_Hit_Rate: 0%%
:collect_sccache_hits
sccache -s > sccache_summary.txt
echo ========================================
echo sccache statistical summary ...
echo ========================================
type sccache_summary.txt
for /f "tokens=2,3" %%i in ('type sccache_summary.txt ^| findstr "requests hits" ^| findstr /V "executed C/C++ CUDA"') do set %%i=%%j
if %requests% EQU 0 (
echo "sccache hit rate: 0%"
echo ipipe_log_param_Clcache_Hit_Hate: 0%
) else (
set /a rate=%hits%*10000/%entries%
echo "clcache hit rate: %rate:~0,-2%.%rate:~-2%%%"
echo ipipe_log_param_Clcache_Hit_Hate: %rate:~0,-2%.%rate:~-2%%%
set /a rate=!hits!*10000/!requests!
echo "sccache hit rate: !rate:~0,-2!.!rate:~-2!%%"
echo ipipe_log_param_Clcache_Hit_Hate: !rate:~0,2!.!rate:~2,2!%%
)
goto:eof
......
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp"})
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_amp"})
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp")
list(REMOVE_ITEM TEST_OPS "test_fleet_with_asp_amp")
foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP})
......
......@@ -26,7 +26,8 @@
:: 4. Visual Studio 2017 Community
:: 5. CUDA 11.2
:: 6. java jre
:: 7. xly agent
:: 7. sccache
:: 8. xly agent
:: Echo command is not required.
@echo off
......@@ -34,7 +35,7 @@ cd /d %~dp0%
:: ===== start step 0: wget tool =====
:: Download wget for windows when there is not wget tool.
echo ">>>>>>>> step [0/7]: wget tool"
echo ">>>>>>>> step [0/8]: wget tool"
wget --help > nul 2> nul || call:install_wget
goto cmake
......@@ -55,7 +56,7 @@ goto :eof
:: Download CMake-3.17.0 and add in PATH when it not installed.
:: TODO: limit version >= 3.17.0
:cmake
echo ">>>>>>>> step [1/7]: CMake 3.17.0"
echo ">>>>>>>> step [1/8]: CMake 3.17.0"
cmake --help > nul 2> nul || call :install_cmake
goto git
......@@ -105,7 +106,7 @@ goto :eof
:: Download Python-3.8.3 and add in PATH when it not installed.
:: TODO: limit version >= 3.8.3
:python
echo ">>>>>>>> step [3/7]: Python 3.8.3"
echo ">>>>>>>> step [3/8]: Python 3.8.3"
python -V 2>&1 | findstr /C:"Python 3.8.3" > nul 2> nul || call :install_python
goto vs
......@@ -130,7 +131,7 @@ goto :eof
:: ===== start step 4: Visual Studio 2017 Community =====
:: Download Visual Studio 2017 when it not installed.
:vs
echo ">>>>>>>> step [4/7]: Visual Studio 2017 "
echo ">>>>>>>> step [4/8]: Visual Studio 2017 "
cmd /C "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvars64.bat" > nul 2> nul || call :install_visual_studio
goto :cuda
......@@ -154,7 +155,7 @@ goto :eof
:: ===== start step 5: CUDA 11 =====
:cuda
echo ">>>>>>>> step [5/7]: CUDA 11.2"
echo ">>>>>>>> step [5/8]: CUDA 11.2"
cmd /C nvcc --version 2> nul | findstr /C:"11.2" > nul 2> nul || call :install_cuda
goto java-jre
......@@ -172,6 +173,7 @@ if %errorlevel% == 0 (
goto :eof
)
del cuda_installer.exe
echo Download cudnn from "https://paddle-ci.gz.bcebos.com/window_requirement/cudnn-11.2-windows-x64-v8.1.0.77.zip"
wget -O cudnn-11.2-windows-x64-v8.1.0.77.zip "https://paddle-ci.gz.bcebos.com/window_requirement/cudnn-11.2-windows-x64-v8.1.0.77.zip"
tar xf cudnn-11.2-windows-x64-v8.1.0.77.zip
......@@ -184,9 +186,9 @@ goto :eof
:: ===== start step 6: java jre =====
:java-jre
echo ">>>>>>>> step [6/7]: java jre"
echo ">>>>>>>> step [6/8]: java jre"
cmd /C java -version > nul 2> nul || call :install_java
goto xly-agent
goto sccache
:install_java
echo There is not java-jre in this PC, will install java-jre.
......@@ -204,9 +206,22 @@ del jre-8u261-windows-x64.exe
goto :eof
:: ===== end step 6: java jre =====
:: ===== start step 7: xly agent =====
:: ===== start step 7: sccache on windowss =====
:sccache
echo ">>>>>>>> step [7/8]: sccache"
cmd /C sccache -V > nul 2> nul || call :download_sccache
goto xly-agent
:download_sccache
echo There is not sccache in this PC, will install sccache.
echo Download package from https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe
wget -O sccache.exe "https://paddle-ci.gz.bcebos.com/window_requirement/sccache.exe"
copy sccache.exe C:\Python38 /Y
:: ===== end step 7: sccache on windows =====
:: ===== start step 8: xly agent =====
:xly-agent
echo ">>>>>>>> step [7/7]: xly agent"
echo ">>>>>>>> step [8/8]: xly agent"
wget -O agent.jar "https://xly.bce.baidu.com/sa_server/agent/v1/download?version=1.2.8"
:: ===== end step 8: xly agent =====
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册