未验证 提交 3ba8c48a 编写于 作者: Z Zhou Wei 提交者: GitHub

[CHERRY-PICK2.1]Remove paddle_custom_op dynamic libraries, and link to...

 [CHERRY-PICK2.1]Remove paddle_custom_op dynamic libraries, and link to FLUID_CORE on windows (#32583) (#32769)

* Remove paddle_custom_op dynamic libraries, change link to FLUID_CORE on windows, and check copy_to

* fix CI
上级 70e0e3d5
...@@ -369,36 +369,3 @@ cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES}) ...@@ -369,36 +369,3 @@ cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES})
if(WITH_TESTING AND TEST selected_rows_test) if(WITH_TESTING AND TEST selected_rows_test)
set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120) set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
endif() endif()
##### 2.0 New custom op extension mechanism related #####
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
if (WIN32)
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
set(PADDLE_CUSTOM_OP_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
cc_library(paddle_custom_op_shared
SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR})
else()
set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
endif()
set(PADDLE_CUSTOM_OP_IMPORT_LIB
${paddle_custom_op_lib_path}/paddle_custom_op.lib
CACHE INTERNAL "Paddle custom op import lib")
set(PADDLE_CUSTOM_OP_SHARED_LIB
${paddle_custom_op_lib_path}/paddle_custom_op.dll
CACHE INTERNAL "Paddle custom op dll")
endif()
...@@ -61,7 +61,7 @@ if not defined WITH_GPU set WITH_GPU=ON ...@@ -61,7 +61,7 @@ if not defined WITH_GPU set WITH_GPU=ON
if not defined WITH_MKL set WITH_MKL=ON if not defined WITH_MKL set WITH_MKL=ON
if not defined WITH_AVX set WITH_AVX=ON if not defined WITH_AVX set WITH_AVX=ON
if not defined WITH_TESTING set WITH_TESTING=ON if not defined WITH_TESTING set WITH_TESTING=ON
if not defined MSVC_STATIC_CRT set MSVC_STATIC_CRT=OFF if not defined MSVC_STATIC_CRT set MSVC_STATIC_CRT=ON
if not defined WITH_PYTHON set WITH_PYTHON=ON if not defined WITH_PYTHON set WITH_PYTHON=ON
if not defined ON_INFER set ON_INFER=ON if not defined ON_INFER set ON_INFER=ON
if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON
...@@ -75,6 +75,7 @@ if not defined LOG_LEVEL set LOG_LEVEL=normal ...@@ -75,6 +75,7 @@ if not defined LOG_LEVEL set LOG_LEVEL=normal
if not defined PRECISION_TEST set PRECISION_TEST=OFF if not defined PRECISION_TEST set PRECISION_TEST=OFF
if not defined NIGHTLY_MODE set PRECISION_TEST=OFF if not defined NIGHTLY_MODE set PRECISION_TEST=OFF
if not defined retry_times set retry_times=2 if not defined retry_times set retry_times=2
if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
rem -------set cache build directory----------- rem -------set cache build directory-----------
rmdir build\python /s/q rmdir build\python /s/q
...@@ -83,9 +84,6 @@ rmdir build\paddle_inference_install_dir /s/q ...@@ -83,9 +84,6 @@ rmdir build\paddle_inference_install_dir /s/q
rmdir build\paddle_inference_c_install_dir /s/q rmdir build\paddle_inference_c_install_dir /s/q
del build\CMakeCache.txt del build\CMakeCache.txt
: set CI_SKIP_CPP_TEST if only *.py changed
git diff --name-only %BRANCH% | findstr /V "\.py" || set CI_SKIP_CPP_TEST=ON
if "%WITH_CACHE%"=="OFF" ( if "%WITH_CACHE%"=="OFF" (
rmdir build /s/q rmdir build /s/q
goto :mkbuild goto :mkbuild
...@@ -135,58 +133,6 @@ dir . ...@@ -135,58 +133,6 @@ dir .
dir %cache_dir% dir %cache_dir%
dir paddle\fluid\pybind\Release dir paddle\fluid\pybind\Release
rem ------initialize the python environment------
if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH%
rem ToDo: virtual environment can't be deleted safely, some process not exit when task is canceled
rem Now use system python environment temporarily
rem %PYTHON_EXECUTABLE% -m pip install virtualenv
rem %PYTHON_EXECUTABLE% -m virtualenv paddle_winci
rem call paddle_winci\Scripts\activate.bat
rem ------pre install python requirement----------
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
rem ------pre install clcache and init config----------
rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache
rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
rem clcache.exe -M 21474836480
:: install ninja if GENERATOR is Ninja
if %GENERATOR% == "Ninja" (
pip install ninja
if %errorlevel% NEQ 0 (
echo pip install ninja failed!
exit /b 7
)
)
rem ------show summary of current environment----------
cmake --version
if "%WITH_GPU%"=="ON" (
nvcc --version
nvidia-smi
)
::python %work_dir%\tools\summary_env.py
::%cache_dir%\tools\busybox64.exe bash %work_dir%\tools\get_cpu_info.sh
goto :CASE_%1 goto :CASE_%1
echo "Usage: paddle_build.bat [OPTION]" echo "Usage: paddle_build.bat [OPTION]"
...@@ -266,8 +212,10 @@ rem "Other configurations are added here" ...@@ -266,8 +212,10 @@ rem "Other configurations are added here"
rem :CASE_wincheck_others rem :CASE_wincheck_others
rem call ... rem call ...
rem --------------------------------------------------------------------------------------------- rem ---------------------------------------------------------------------------------------------
:cmake :cmake
@ECHO OFF
echo ======================================== echo ========================================
echo Step 1. Cmake ... echo Step 1. Cmake ...
echo ======================================== echo ========================================
...@@ -281,12 +229,52 @@ set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH% ...@@ -281,12 +229,52 @@ set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%# for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
set start=%start:~4,10% set start=%start:~4,10%
@ECHO ON if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
set PATH=%TENSORRT_ROOT:/=\%\lib;%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH% set PATH=%TENSORRT_ROOT:/=\%\lib;%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH%
rem ------set third_party cache dir------ rem install ninja if GENERATOR is Ninja
if %GENERATOR% == "Ninja" (
pip install ninja
if %errorlevel% NEQ 0 (
echo pip install ninja failed!
exit /b 7
)
)
rem ------show summary of current GPU environment----------
cmake --version
if "%WITH_GPU%"=="ON" (
nvcc --version
nvidia-smi
)
rem ------initialize the python environment------
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH%
if %WITH_PYTHON% == "OFF" (
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
)
rem ------pre install clcache and init config----------
rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache
rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
rem clcache.exe -M 21474836480
rem ------set third_party cache dir------
: clear third party cache every once in a while : clear third party cache every once in a while
for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%# for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%#
set day_now=%datetime:~6,2% set day_now=%datetime:~6,2%
...@@ -500,6 +488,10 @@ echo ======================================== ...@@ -500,6 +488,10 @@ echo ========================================
echo Step 4. Running unit tests ... echo Step 4. Running unit tests ...
echo ======================================== echo ========================================
: set CI_SKIP_CPP_TEST if only *.py changed
git diff --name-only %BRANCH% | findstr /V "\.py" || set CI_SKIP_CPP_TEST=ON
pip install -r %work_dir%\python\unittest_py\requirements.txt --user pip install -r %work_dir%\python\unittest_py\requirements.txt --user
if %ERRORLEVEL% NEQ 0 ( if %ERRORLEVEL% NEQ 0 (
echo pip install unittest requirements.txt failed! echo pip install unittest requirements.txt failed!
......
...@@ -43,9 +43,20 @@ set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/) ...@@ -43,9 +43,20 @@ set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/)
IF(WIN32) IF(WIN32)
# Python would use the .pyd by default under Windows series platform # Python would use the .pyd by default under Windows series platform
set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd) set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd)
set(FLUID_CORE_LIB ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.lib)
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_LINKER_FILE:paddle_pybind> ${FLUID_CORE_LIB}
DEPENDS paddle_pybind)
set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd) set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
ELSE() ELSE()
set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so) set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so)
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
DEPENDS paddle_pybind)
set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so) set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so)
ENDIF() ENDIF()
...@@ -68,9 +79,6 @@ if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}") ...@@ -68,9 +79,6 @@ if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}")
list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE}) list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE})
endif() endif()
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS}) add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS})
IF(WIN32) IF(WIN32)
...@@ -84,6 +92,7 @@ ELSE(WIN32) ...@@ -84,6 +92,7 @@ ELSE(WIN32)
COMMAND touch stub.cc COMMAND touch stub.cc
COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMENT "Packing whl packages------>>>"
DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES}) DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES})
ENDIF() ENDIF()
......
...@@ -24,6 +24,6 @@ def check_import_scipy(OsName): ...@@ -24,6 +24,6 @@ def check_import_scipy(OsName):
if 'DLL load failed' in print_info: if 'DLL load failed' in print_info:
raise ImportError( raise ImportError(
print_info + print_info +
"\nplease download visual C++ Redistributable for vs 2015, https://www.microsoft.com/en-us/download/details.aspx?id=48145" "\nplease download Visual C++ Redistributable from https://support.microsoft.com/en-us/topic/the-latest-supported-visual-c-downloads-2647da03-1eea-4433-9aff-95f26a218cc0"
) )
return return
...@@ -37,7 +37,10 @@ if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix): ...@@ -37,7 +37,10 @@ if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix):
try: try:
if os.name == 'nt': if os.name == 'nt':
third_lib_path = current_path + os.sep + '..' + os.sep + 'libs' third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
os.environ['path'] = third_lib_path + ';' + os.environ['path'] # Will load shared library from 'path' on windows
os.environ[
'path'] = current_path + ';' + third_lib_path + ';' + os.environ[
'path']
sys.path.insert(0, third_lib_path) sys.path.insert(0, third_lib_path)
# Note: from python3.8, PATH will not take effect # Note: from python3.8, PATH will not take effect
# https://github.com/python/cpython/pull/12302 # https://github.com/python/cpython/pull/12302
...@@ -298,7 +301,7 @@ if avx_supported(): ...@@ -298,7 +301,7 @@ if avx_supported():
"WARNING: AVX is supported on local machine, but you have installed " "WARNING: AVX is supported on local machine, but you have installed "
"paddlepaddle without avx core. Hence, no_avx core which has worse " "paddlepaddle without avx core. Hence, no_avx core which has worse "
"preformance will be imported.\nYou could reinstall paddlepaddle by " "preformance will be imported.\nYou could reinstall paddlepaddle by "
"'python -m pip install -U paddlepaddle-gpu[==version]' or rebuild " "'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' or rebuild "
"paddlepaddle WITH_AVX=ON to get better performance.\n" "paddlepaddle WITH_AVX=ON to get better performance.\n"
"The original error is: %s\n" % cpt.get_exception_message(e)) "The original error is: %s\n" % cpt.get_exception_message(e))
load_noavx = True load_noavx = True
...@@ -350,12 +353,19 @@ if load_noavx: ...@@ -350,12 +353,19 @@ if load_noavx:
sys.stderr.write( sys.stderr.write(
'Error: Can not import noavx core while this file exists: ' + 'Error: Can not import noavx core while this file exists: ' +
current_path + os.sep + 'core_noavx.' + core_suffix + '\n') current_path + os.sep + 'core_noavx.' + core_suffix + '\n')
elif avx_supported():
sys.stderr.write(
"Error: AVX is support on your machine, but you have installed "
"paddlepaddle without avx core, you should reinstall paddlepaddle by "
"'python -m pip install --force-reinstall paddlepaddle-gpu[==version]\n"
)
else: else:
sys.stderr.write( sys.stderr.write(
"Error: AVX is not support on your machine, but you have installed " "Error: AVX is not support on your machine, but you have installed "
"paddlepaddle with avx core, you should reinstall paddlepaddle by " "paddlepaddle without no_avx core, you should reinstall paddlepaddle by "
"'python -m pip install -U paddlepaddle-gpu[==version] -f " "'python -m pip install --force-reinstall paddlepaddle-gpu[==version] -f "
"https://paddlepaddle.org.cn/whl/stable_noavx.html'\n") "https://paddlepaddle.org.cn/whl/mkl/stable/noavx.html or "
"https://paddlepaddle.org.cn/whl/openblas/stable/noavx.html\n")
raise e raise e
......
# New custom OP can support Windows/Linux now # New custom OP can support Windows/Linux/Mac now
if(WITH_GPU OR APPLE) if(WITH_GPU OR APPLE)
# GPU custom op tests: compile both .cc and .cu file
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
py_test(test_custom_relu_model SRCS test_custom_relu_model.py) py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
......
...@@ -45,8 +45,12 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) { ...@@ -45,8 +45,12 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
int grid = (numel + block - 1) / block; int grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_AND_HALF_TYPES( PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "relu_cuda_forward_kernel", ([&] { x.type(), "relu_cuda_forward_kernel", ([&] {
auto cpu_input = x.copy_to<data_t>(paddle::PlaceType::kCPU);
auto gpu_input = cpu_input.copy_to<data_t>(paddle::PlaceType::kGPU);
relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>( relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel); gpu_input.data<data_t>(),
out.mutable_data<data_t>(x.place()),
numel);
})); }));
return {out}; return {out};
......
...@@ -64,6 +64,7 @@ class TestCheckCompiler(TestABIBase): ...@@ -64,6 +64,7 @@ class TestCheckCompiler(TestABIBase):
# clear environ # clear environ
self.del_environ() self.del_environ()
compiler = 'python' # fake wrong compiler compiler = 'python' # fake wrong compiler
if not utils.IS_WINDOWS:
with warnings.catch_warnings(record=True) as error: with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True) flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False # check return False
...@@ -73,6 +74,20 @@ class TestCheckCompiler(TestABIBase): ...@@ -73,6 +74,20 @@ class TestCheckCompiler(TestABIBase):
self.assertTrue( self.assertTrue(
"Compiler Compatibility WARNING" in str(error[0].message)) "Compiler Compatibility WARNING" in str(error[0].message))
def test_exception_windows(self):
# clear environ
self.del_environ()
compiler = 'fake compiler' # fake command
if utils.IS_WINDOWS:
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check ABI Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue("Failed to check compiler version for" in
str(error[0].message))
def test_exception_linux(self): def test_exception_linux(self):
# clear environ # clear environ
self.del_environ() self.del_environ()
......
...@@ -105,12 +105,12 @@ class TestJITLoad(unittest.TestCase): ...@@ -105,12 +105,12 @@ class TestJITLoad(unittest.TestCase):
in str(e)) in str(e))
if IS_WINDOWS: if IS_WINDOWS:
self.assertTrue( self.assertTrue(
r"python\paddle\fluid\tests\custom_op\custom_relu_op.cc:47" r"python\paddle\fluid\tests\custom_op\custom_relu_op.cc" in
in str(e)) str(e))
else: else:
self.assertTrue( self.assertTrue(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cc:47" "python/paddle/fluid/tests/custom_op/custom_relu_op.cc" in
in str(e)) str(e))
self.assertTrue(caught_exception) self.assertTrue(caught_exception)
caught_exception = False caught_exception = False
...@@ -126,7 +126,7 @@ class TestJITLoad(unittest.TestCase): ...@@ -126,7 +126,7 @@ class TestJITLoad(unittest.TestCase):
"function \"relu_cuda_forward_kernel\" is not implemented for data type `int32_t`" "function \"relu_cuda_forward_kernel\" is not implemented for data type `int32_t`"
in str(e)) in str(e))
self.assertTrue( self.assertTrue(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:50" in "python/paddle/fluid/tests/custom_op/custom_relu_op.cu" in
str(e)) str(e))
self.assertTrue(caught_exception) self.assertTrue(caught_exception)
......
...@@ -26,7 +26,7 @@ from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension ...@@ -26,7 +26,7 @@ from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath, _get_fluid_path
from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
...@@ -69,7 +69,7 @@ def setup(**attr): ...@@ -69,7 +69,7 @@ def setup(**attr):
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2. then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3). PaddlePaddle (Visual Studio 2017).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility. occur because of ABI compatibility.
...@@ -79,7 +79,7 @@ def setup(**attr): ...@@ -79,7 +79,7 @@ def setup(**attr):
2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` . 2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
GCC version. GCC version.
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2015 update3). 3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
Compared with Just-In-Time ``load`` interface, it only compiles once by executing Compared with Just-In-Time ``load`` interface, it only compiles once by executing
...@@ -611,7 +611,7 @@ class BuildExtension(build_ext, object): ...@@ -611,7 +611,7 @@ class BuildExtension(build_ext, object):
msg = ( msg = (
'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.' 'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.'
'This may lead to multiple activations of the VC env.' 'This may lead to multiple activations of the VC env.'
'Please set `DISTUTILS_USE_SDK=1` and try again.') 'Please run `set DISTUTILS_USE_SDK=1` and try again.')
raise UserWarning(msg) raise UserWarning(msg)
def _record_op_info(self): def _record_op_info(self):
...@@ -724,7 +724,7 @@ def load(name, ...@@ -724,7 +724,7 @@ def load(name,
processes under a individual subprocess. It does not require CMake or Ninja processes under a individual subprocess. It does not require CMake or Ninja
environment. On Linux platform, it requires GCC compiler whose version is environment. On Linux platform, it requires GCC compiler whose version is
greater than 5.4 and it should be soft linked to ``/usr/bin/cc`` . On Windows greater than 5.4 and it should be soft linked to ``/usr/bin/cc`` . On Windows
platform, it requires Visual Studio whose version is greater than 2015 update3. platform, it requires Visual Studio whose version is greater than 2017.
On MacOS, clang++ is requited. In addition, if compiling Operators supporting On MacOS, clang++ is requited. In addition, if compiling Operators supporting
GPU device, please make sure ``nvcc`` compiler is installed in local environment. GPU device, please make sure ``nvcc`` compiler is installed in local environment.
...@@ -735,7 +735,7 @@ def load(name, ...@@ -735,7 +735,7 @@ def load(name,
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2. then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3). PaddlePaddle (Visual Studio 2017).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility. occur because of ABI compatibility.
...@@ -749,7 +749,7 @@ def load(name, ...@@ -749,7 +749,7 @@ def load(name,
2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` . 2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
GCC version. GCC version.
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2015 update3). 3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
**A simple example:** **A simple example:**
...@@ -802,9 +802,6 @@ def load(name, ...@@ -802,9 +802,6 @@ def load(name,
# ensure to use abs path # ensure to use abs path
build_directory = os.path.abspath(build_directory) build_directory = os.path.abspath(build_directory)
# Will load shared library from 'path' on windows
if IS_WINDOWS:
os.environ['path'] = build_directory + ';' + os.environ['path']
log_v("build_directory: {}".format(build_directory), verbose) log_v("build_directory: {}".format(build_directory), verbose)
...@@ -827,6 +824,7 @@ def load(name, ...@@ -827,6 +824,7 @@ def load(name,
# write setup.py file and compile it # write setup.py file and compile it
build_base_dir = os.path.join(build_directory, name) build_base_dir = os.path.join(build_directory, name)
_write_setup_file(name, sources, file_path, build_base_dir, _write_setup_file(name, sources, file_path, build_base_dir,
extra_include_paths, extra_cxx_cflags, extra_cuda_cflags, extra_include_paths, extra_cxx_cflags, extra_cuda_cflags,
extra_ldflags, verbose) extra_ldflags, verbose)
......
...@@ -55,7 +55,7 @@ CLANG_LINK_FLAGS = [ ...@@ -55,7 +55,7 @@ CLANG_LINK_FLAGS = [
'-dynamiclib', '-undefined', 'dynamic_lookup', '-arch', 'x86_64' '-dynamiclib', '-undefined', 'dynamic_lookup', '-arch', 'x86_64'
] ]
MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib'] MSVC_LINK_FLAGS = ['/MACHINE:X64']
COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU'] COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU']
...@@ -371,10 +371,11 @@ def _get_core_name(): ...@@ -371,10 +371,11 @@ def _get_core_name():
Return pybind DSO module name. Return pybind DSO module name.
""" """
import paddle import paddle
if paddle.fluid.core.load_noavx: ext_name = '.pyd' if IS_WINDOWS else '.so'
return 'core_noavx.so' if not paddle.fluid.core.load_noavx:
return 'core_avx' + ext_name
else: else:
return 'core_avx.so' return 'core_noavx' + ext_name
def _get_lib_core_path(): def _get_lib_core_path():
...@@ -386,6 +387,15 @@ def _get_lib_core_path(): ...@@ -386,6 +387,15 @@ def _get_lib_core_path():
return os.path.join(_get_fluid_path(), lib_core_name) return os.path.join(_get_fluid_path(), lib_core_name)
def _get_dll_core_path():
"""
Return real path of libcore_(no)avx.dylib on Windows.
"""
raw_core_name = _get_core_name()
dll_core_name = "paddle_pybind.dll"
return os.path.join(_get_fluid_path(), dll_core_name)
def _reset_so_rpath(so_path): def _reset_so_rpath(so_path):
""" """
NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs` NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
...@@ -435,9 +445,12 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -435,9 +445,12 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
# append link flags # append link flags
extra_link_args = kwargs.get('extra_link_args', []) extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.extend(MSVC_LINK_FLAGS) extra_link_args.extend(MSVC_LINK_FLAGS)
lib_core_name = create_sym_link_if_not_exist()
extra_link_args.append('{}'.format(lib_core_name))
if use_cuda: if use_cuda:
extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib']) extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
kwargs['extra_link_args'] = extra_link_args kwargs['extra_link_args'] = extra_link_args
else: else:
########################### Linux Platform ########################### ########################### Linux Platform ###########################
extra_link_args = kwargs.get('extra_link_args', []) extra_link_args = kwargs.get('extra_link_args', [])
...@@ -481,13 +494,30 @@ def create_sym_link_if_not_exist(): ...@@ -481,13 +494,30 @@ def create_sym_link_if_not_exist():
""" """
Create soft symbol link of `core_avx.so` or `core_noavx.so` Create soft symbol link of `core_avx.so` or `core_noavx.so`
""" """
assert OS_NAME.startswith('darwin') assert OS_NAME.startswith('darwin') or IS_WINDOWS
raw_core_name = _get_core_name() raw_core_name = _get_core_name()
core_path = os.path.join(_get_fluid_path(), raw_core_name) core_path = os.path.join(_get_fluid_path(), raw_core_name)
new_lib_core_path = _get_lib_core_path() if IS_WINDOWS:
new_dll_core_path = _get_dll_core_path()
# create symbol link on windows
if not os.path.exists(new_dll_core_path):
try:
os.symlink(core_path, new_dll_core_path)
except Exception:
warnings.warn(
"Failed to create soft symbol link for {}.\n You can run prompt as administrator and execute the "
"following command manually: `mklink {} {}`. Now it will create hard link for {} trickly.".
format(raw_core_name, new_dll_core_path, core_path,
raw_core_name))
run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path))
# core_avx or core_noavx with lib suffix
assert os.path.exists(new_dll_core_path)
return raw_core_name[:-4] + ".lib"
# create symbol link else:
new_lib_core_path = _get_lib_core_path()
# create symbol link on mac
if not os.path.exists(new_lib_core_path): if not os.path.exists(new_lib_core_path):
try: try:
os.symlink(core_path, new_lib_core_path) os.symlink(core_path, new_lib_core_path)
...@@ -1054,12 +1084,12 @@ def check_abi_compatibility(compiler, verbose=False): ...@@ -1054,12 +1084,12 @@ def check_abi_compatibility(compiler, verbose=False):
if os.environ.get('PADDLE_SKIP_CHECK_ABI') in ['True', 'true', '1']: if os.environ.get('PADDLE_SKIP_CHECK_ABI') in ['True', 'true', '1']:
return True return True
which = 'where' if IS_WINDOWS else 'which' if not IS_WINDOWS:
cmd_out = subprocess.check_output( cmd_out = subprocess.check_output(
[which, compiler], stderr=subprocess.STDOUT) ['which', compiler], stderr=subprocess.STDOUT)
compiler_path = os.path.realpath(cmd_out.decode() compiler_path = os.path.realpath(cmd_out.decode()
if six.PY3 else cmd_out).strip() if six.PY3 else cmd_out).strip()
# step 1. if not found any suitable compiler, raise error # if not found any suitable compiler, raise warning
if not any(name in compiler_path if not any(name in compiler_path
for name in _expected_compiler_current_platform()): for name in _expected_compiler_current_platform()):
warnings.warn( warnings.warn(
......
...@@ -255,11 +255,15 @@ paddle_bins = '' ...@@ -255,11 +255,15 @@ paddle_bins = ''
if not '${WIN32}': if not '${WIN32}':
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'] paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + ('.so' if os.name != 'nt' else '.pyd')]}
if os.name != 'nt':
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.so']}
else:
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.pyd', '${FLUID_CORE_NAME}' + '.lib']}
if '${HAS_NOAVX_CORE}' == 'ON': if '${HAS_NOAVX_CORE}' == 'ON':
package_data['paddle.fluid'] += ['core_noavx' + ('.so' if os.name != 'nt' else '.pyd')] package_data['paddle.fluid'] += ['core_noavx' + ('.so' if os.name != 'nt' else '.pyd')]
package_dir={ package_dir={
'': '${PADDLE_BINARY_DIR}/python', '': '${PADDLE_BINARY_DIR}/python',
# The paddle.fluid.proto will be generated while compiling. # The paddle.fluid.proto will be generated while compiling.
...@@ -353,14 +357,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '': ...@@ -353,14 +357,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '':
package_data['paddle.libs']+=['libxpurt.so'] package_data['paddle.libs']+=['libxpurt.so']
### New custom op extension mechanism related ###
# copy paddle_custom_op.lib/paddle_custom_op.dll to libs on Windows
if os.name == 'nt':
shutil.copy('${PADDLE_CUSTOM_OP_IMPORT_LIB}', libs_path)
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_custom_op.lib', 'paddle_custom_op.dll']
# remove unused paddle/libs/__init__.py # remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'): if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py') os.remove(libs_path+'/__init__.py')
......
...@@ -436,6 +436,169 @@ CPU_PARALLEL_JOB = [ ...@@ -436,6 +436,169 @@ CPU_PARALLEL_JOB = [
'assign_op_test', 'assign_op_test',
'allocator_facade_frac_flags_test', 'allocator_facade_frac_flags_test',
'aes_cipher_test', 'aes_cipher_test',
'test_dist_sparse_tensor_load_adagrad',
'test_dist_mnist_fp16_allreduce',
'test_dist_mnist_gradient_merge',
'test_dist_allreduce_op',
'test_hdfs3',
'test_parallel_dygraph_se_resnext',
'test_dist_fleet_ps9',
'test_dist_fleet_infer',
'test_dist_se_resnext_sync',
'test_dist_oneps',
'test_dist_sparse_load_ps1',
'test_dist_mnist_batch_merge',
'test_dist_fleet_ctr',
'test_dist_fleet_ps10',
'test_parallel_dygraph_transformer',
'test_dist_mnist_fleetapi',
'test_dist_sparse_tensor_load_adam',
'test_dist_fleet_ps4',
'test_dist_fleet_heter_program',
'test_parallel_dygraph_sparse_embedding_over_height',
'test_hdfs2',
'test_dist_sharding_save',
'test_dist_fleet_ps_gpu_ctr',
'test_dist_mnist_backward_deps',
'test_dist_fleet_heter_base',
'test_dist_sparse_tensor_load_sgd',
'test_new_group',
'test_dist_mnist_with_program',
'test_dist_mnist_pg',
'test_dist_sparse_tensor_load_rmsprop',
'test_auto_checkpoint2',
'test_dist_sparse_tensor_load_ftrl',
'test_dist_fleet_ps6',
'test_dist_mnist_fleet_save',
'test_auto_checkpoint1',
'test_dist_fleet_a_sync_optimizer_sync',
'test_dist_fleet_ps3',
'test_dist_se_resnext_nccl',
'test_parallel_dygraph_mnist',
'test_auto_checkpoint_multiple',
'test_dist_fleet_a_sync_optimizer_auto_async',
'test_pipeline',
'test_dist_fleet_ps8',
'test_dist_fleet_sparse_embedding_ctr',
'test_dist_se_resnext_dgc',
'test_dist_fleet_ps7',
'test_dist_fleet_decay',
'test_dist_fleet_a_sync_optimizer_auto_geo',
'test_dist_fleet_geo',
'test_parallel_dygraph_dataparallel',
'test_hdfs1',
'test_dist_mnist_dgc_nccl',
'test_dist_fleet_ctr2',
'test_parallel_dygraph_unused_variables',
'test_dist_mnist_multi_comm',
'test_dist_sparse_tensor_load_momentum',
'test_gen_nccl_id_op',
'test_parallel_dygraph_sparse_embedding',
'test_dist_mnist_ring_allreduce',
'test_fleet_launch_async',
'test_dist_fleet_a_sync_optimizer_geo',
'test_parallel_dygraph_control_flow',
'test_auto_checkpoint',
'test_fleet_pipeline_meta_optimizer',
'test_dist_fleet_heter_ctr',
'test_fleet_graph_execution_meta_optimizer',
'test_fleet_run_random_port',
'test_dist_fleet_ps5',
'test_dist_fleet_a_sync_optimizer_auto',
'test_dist_lookup_sparse_table_fuse_ops',
'test_dist_fleet_a_sync_optimizer_async',
'test_c_comm_init_op',
'test_fleet_launch_nproc',
'test_dist_fleet_simnet',
'test_auto_checkpoint_dist_basic',
'test_fleet_launch_cloud',
'test_dist_fleet_ps',
'test_dist_op',
'test_dist_sparse_load_ps0',
'test_auto_checkpoint3',
'test_dist_fleet_ps2',
'test_dist_fleet_grad_clip',
'test_custom_concat',
'test_analyzer_transformer_fuse',
'test_analyzer_seq_pool1_fuse_statis',
'test_fc_lstm_fuse_pass_cc',
'test_layer_norm_fuse_pass',
'test_fc_gru_fuse_pass_cc',
'test_analyzer_save_model',
'test_fleet_ps',
'test_analyzer_multi_model_prediction',
'test_fleet_base_3',
'test_fleet_base_2',
'test_ascend_trigger',
'test_fleet_amp_meta_optimizer',
'test_fleetrun',
'test_check_abi',
'dense_table_test',
'test_adaptive_pool2d_convert_global_pass',
'test_fleet_recompute_meta_optimizer',
'test_fleet_fp16_allreduce_meta_optimizer',
'test_post_training_quantization_lstm_model',
'test_fleet_metric',
'test_fleet_gradient_merge_meta_optimizer',
'test_fleet_sharding_meta_optimizer',
'test_listen_and_serv_op',
'test_analyzer_zerocopytensor_tensor',
'test_conv_bn_fuse_pass_cc',
'test_collective_optimizer',
'test_bf16_utils',
'test_analyzer_seq_pool1_compare_determine',
'test_avoid_twice_initialization',
'test_callback_early_stop',
'test_fleet_distributed_strategy',
'test_launch_coverage',
'test_sgd_op_bf16',
'test_model_cast_to_bf16',
'test_hybrid_parallel_topology',
'barrier_table_test',
'test_check_error',
'test_fleet_lamb_meta_optimizer',
'test_fleet_rolemaker_2',
'test_distributed_strategy',
'test_rnn_cudnn_params_packing',
'test_communicator_async',
'brpc_utils_test',
'test_analyzer_capi_pd_tensor',
'test_recv_save_op',
'heter_listen_and_server_test',
'test_analyzer_capi_ner',
'test_unsqueeze2_eltwise_fuse_pass',
'test_dgc_optimizer',
'test_fleet_cc',
'test_repeated_fc_relu_fuse_pass_cc',
'heter_server_test',
'test_static_save_load_large',
'graph_node_test',
'test_custom_conj',
'test_fleet_private_function',
'test_fake_init_op',
'brpc_service_sparse_sgd_test',
'test_tf32_cudnn',
'test_communicator_geo',
'test_dispatch_jit',
'test_layer_norm_fuse_pass_cc',
'test_fleet_dgc_meta_optimizer',
'test_fc_fuse_pass_cc',
'test_communicator_sync',
'test_analyzer_capi',
'test_fleet_lars_meta_optimizer',
'test_communicator_half_async',
'test_fleet_localsgd_meta_optimizer',
'test_fleet_amp_init',
'test_fleet_checkpoint',
'test_analyzer_seq_pool1_fuse_compare_zero_copy',
'test_lookup_table_bf16_op',
'test_fleet_meta_optimizer_base',
'table_test',
'test_fleet_rolemaker_new',
'test_fleet_graph_executor',
'test_multi_out_jit',
'test_fleet_utils',
'brpc_service_dense_sgd_test',
] ]
# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
...@@ -477,6 +640,50 @@ TETRAD_PARALLEL_JOB = [ ...@@ -477,6 +640,50 @@ TETRAD_PARALLEL_JOB = [
'tensor_test', 'tensor_test',
'test_repeated_fc_relu_fuse_pass_cc', 'test_repeated_fc_relu_fuse_pass_cc',
'test_mkldnn_caching', 'test_mkldnn_caching',
'test_analyzer_seq_pool1',
'test_analyzer_ocr',
'test_analyzer_seq_conv1',
'test_analyzer_small_dam',
'test_analyzer_mobilenet_depthwise_conv',
'test_analyzer_pyramid_dnn',
'test_analyzer_text_classification',
'test_analyzer_rnn2',
'test_analyzer_transformer',
'test_analyzer_resnet50',
'test_analyzer_ner',
'test_analyzer_lac',
'test_analyzer_transformer_profile',
'test_analyzer_mobilenet_transpose',
'test_analyzer_rnn1',
'test_analyzer_seq_pool1_profile',
'test_analyzer_paddletensor_tensor',
'test_analyzer_bert',
'test_analyzer_googlenet',
'zero_copy_tensor_test',
'custom_tensor_test',
'test_fleet_base',
'test_imperative_container_layerdict',
'test_complex_simplenet',
'test_tensor_register_hook',
'test_set_value_op',
'test_tensor_type_promotion',
'test_view_op_reuse_allocation',
'test_complex_grad_accumulated',
'test_sequential',
'test_sequential',
'test_imperative_layers',
'test_dgc_momentum_op',
'test_memcpy_op',
'test_dgc_op',
'test_modelaverage',
'test_lookahead',
'test_callback_visualdl',
'test_new_group_api',
'test_collective_split_embedding_none_divisible',
'test_collective_wait',
'test_collective_split_row_linear',
'test_collective_split_col_linear',
'test_collective_split_embedding',
] ]
# It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED, # It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
...@@ -611,7 +818,6 @@ TWO_PARALLEL_JOB = [ ...@@ -611,7 +818,6 @@ TWO_PARALLEL_JOB = [
'test_adam_op_multi_thread', 'test_adam_op_multi_thread',
'test_adamax_op', 'test_adamax_op',
'test_while_loop_op', 'test_while_loop_op',
'test_affine_grid_function',
'test_transpose_flatten_concat_fuse_pass', 'test_transpose_flatten_concat_fuse_pass',
'test_trace_op', 'test_trace_op',
'test_backward', 'test_backward',
...@@ -663,7 +869,6 @@ TWO_PARALLEL_JOB = [ ...@@ -663,7 +869,6 @@ TWO_PARALLEL_JOB = [
'test_gather_op', 'test_gather_op',
'test_partial_concat_op', 'test_partial_concat_op',
'test_gaussian_random_op', 'test_gaussian_random_op',
'test_paddle_imperative_double_grad',
'test_generate_proposals_v2_op', 'test_generate_proposals_v2_op',
'test_pad_constant_like', 'test_pad_constant_like',
'test_grid_sample_function', 'test_grid_sample_function',
...@@ -879,6 +1084,11 @@ TWO_PARALLEL_JOB = [ ...@@ -879,6 +1084,11 @@ TWO_PARALLEL_JOB = [
'test_imperative_load_static_param', 'test_imperative_load_static_param',
'test_fuse_bn_add_act_pass', 'test_fuse_bn_add_act_pass',
'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass', 'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass',
'test_quantize_transpiler_v2',
'paddle_infer_api_test',
'test_analyzer_ernie',
'lite_resnet50_test',
'lite_mul_model_test',
] ]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册