未验证 提交 3ba8c48a 编写于 作者: Z Zhou Wei 提交者: GitHub

[CHERRY-PICK2.1]Remove paddle_custom_op dynamic libraries, and link to...

 [CHERRY-PICK2.1]Remove paddle_custom_op dynamic libraries, and link to FLUID_CORE on windows (#32583) (#32769)

* Remove paddle_custom_op dynamic libraries, change link to FLUID_CORE on windows, and check copy_to

* fix CI
上级 70e0e3d5
......@@ -369,36 +369,3 @@ cc_library(paddle_framework DEPS ${FLUID_FRAMEWORK_MODULES})
if(WITH_TESTING AND TEST selected_rows_test)
set_tests_properties(selected_rows_test PROPERTIES TIMEOUT 120)
endif()
##### 2.0 New custom op extension mechanism related #####
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
if (WIN32)
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
set(PADDLE_CUSTOM_OP_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
cc_library(paddle_custom_op_shared
SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR})
else()
set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE})
endif()
set(PADDLE_CUSTOM_OP_IMPORT_LIB
${paddle_custom_op_lib_path}/paddle_custom_op.lib
CACHE INTERNAL "Paddle custom op import lib")
set(PADDLE_CUSTOM_OP_SHARED_LIB
${paddle_custom_op_lib_path}/paddle_custom_op.dll
CACHE INTERNAL "Paddle custom op dll")
endif()
......@@ -54,14 +54,14 @@ wmic process where name="python.exe" call terminate 2>NUL
rem ------initialize common variable------
if not defined GENERATOR set GENERATOR="Visual Studio 15 2017 Win64"
if not defined BRANCH set BRANCH=develop
if not defined WITH_TENSORRT set WITH_TENSORRT=ON
if not defined WITH_TENSORRT set WITH_TENSORRT=ON
if not defined TENSORRT_ROOT set TENSORRT_ROOT=D:/TensorRT
if not defined CUDA_ARCH_NAME set CUDA_ARCH_NAME=Auto
if not defined WITH_GPU set WITH_GPU=ON
if not defined WITH_MKL set WITH_MKL=ON
if not defined WITH_AVX set WITH_AVX=ON
if not defined WITH_TESTING set WITH_TESTING=ON
if not defined MSVC_STATIC_CRT set MSVC_STATIC_CRT=OFF
if not defined MSVC_STATIC_CRT set MSVC_STATIC_CRT=ON
if not defined WITH_PYTHON set WITH_PYTHON=ON
if not defined ON_INFER set ON_INFER=ON
if not defined WITH_INFERENCE_API_TEST set WITH_INFERENCE_API_TEST=ON
......@@ -75,6 +75,7 @@ if not defined LOG_LEVEL set LOG_LEVEL=normal
if not defined PRECISION_TEST set PRECISION_TEST=OFF
if not defined NIGHTLY_MODE set PRECISION_TEST=OFF
if not defined retry_times set retry_times=2
if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
rem -------set cache build directory-----------
rmdir build\python /s/q
......@@ -83,9 +84,6 @@ rmdir build\paddle_inference_install_dir /s/q
rmdir build\paddle_inference_c_install_dir /s/q
del build\CMakeCache.txt
: set CI_SKIP_CPP_TEST if only *.py changed
git diff --name-only %BRANCH% | findstr /V "\.py" || set CI_SKIP_CPP_TEST=ON
if "%WITH_CACHE%"=="OFF" (
rmdir build /s/q
goto :mkbuild
......@@ -135,58 +133,6 @@ dir .
dir %cache_dir%
dir paddle\fluid\pybind\Release
rem ------initialize the python environment------
if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH%
rem ToDo: virtual environment can't be deleted safely, some process not exit when task is canceled
rem Now use system python environment temporarily
rem %PYTHON_EXECUTABLE% -m pip install virtualenv
rem %PYTHON_EXECUTABLE% -m virtualenv paddle_winci
rem call paddle_winci\Scripts\activate.bat
rem ------pre install python requirement----------
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
rem ------pre install clcache and init config----------
rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache
rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
rem clcache.exe -M 21474836480
:: install ninja if GENERATOR is Ninja
if %GENERATOR% == "Ninja" (
pip install ninja
if %errorlevel% NEQ 0 (
echo pip install ninja failed!
exit /b 7
)
)
rem ------show summary of current environment----------
cmake --version
if "%WITH_GPU%"=="ON" (
nvcc --version
nvidia-smi
)
::python %work_dir%\tools\summary_env.py
::%cache_dir%\tools\busybox64.exe bash %work_dir%\tools\get_cpu_info.sh
goto :CASE_%1
echo "Usage: paddle_build.bat [OPTION]"
......@@ -266,8 +212,10 @@ rem "Other configurations are added here"
rem :CASE_wincheck_others
rem call ...
rem ---------------------------------------------------------------------------------------------
:cmake
@ECHO OFF
echo ========================================
echo Step 1. Cmake ...
echo ========================================
......@@ -281,12 +229,52 @@ set PATH=C:\Program Files (x86)\Windows Kits\10\bin\10.0.17763.0\x64;%PATH%
for /F %%# in ('wmic os get localdatetime^|findstr 20') do set start=%%#
set start=%start:~4,10%
@ECHO ON
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.0
if not defined CUDA_TOOLKIT_ROOT_DIR set CUDA_TOOLKIT_ROOT_DIR=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.2
set PATH=%TENSORRT_ROOT:/=\%\lib;%CUDA_TOOLKIT_ROOT_DIR%\bin;%CUDA_TOOLKIT_ROOT_DIR%\libnvvp;%PATH%
rem ------set third_party cache dir------
rem install ninja if GENERATOR is Ninja
if %GENERATOR% == "Ninja" (
pip install ninja
if %errorlevel% NEQ 0 (
echo pip install ninja failed!
exit /b 7
)
)
rem ------show summary of current GPU environment----------
cmake --version
if "%WITH_GPU%"=="ON" (
nvcc --version
nvidia-smi
)
rem ------initialize the python environment------
set PYTHON_EXECUTABLE=%PYTHON_ROOT%\python.exe
set PATH=%PYTHON_ROOT%;%PYTHON_ROOT%\Scripts;%PATH%
if %WITH_PYTHON% == "OFF" (
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
)
rem ------pre install clcache and init config----------
rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache
rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
rem clcache.exe -M 21474836480
rem ------set third_party cache dir------
: clear third party cache every once in a while
for /F %%# in ('wmic os get localdatetime^|findstr 20') do set datetime=%%#
set day_now=%datetime:~6,2%
......@@ -500,6 +488,10 @@ echo ========================================
echo Step 4. Running unit tests ...
echo ========================================
: set CI_SKIP_CPP_TEST if only *.py changed
git diff --name-only %BRANCH% | findstr /V "\.py" || set CI_SKIP_CPP_TEST=ON
pip install -r %work_dir%\python\unittest_py\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install unittest requirements.txt failed!
......
......@@ -43,9 +43,20 @@ set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/)
IF(WIN32)
# Python would use the .pyd by default under Windows series platform
set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd)
set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
set(FLUID_CORE_LIB ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.lib)
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_LINKER_FILE:paddle_pybind> ${FLUID_CORE_LIB}
DEPENDS paddle_pybind)
set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
ELSE()
set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so)
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
DEPENDS paddle_pybind)
set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so)
ENDIF()
......@@ -68,9 +79,6 @@ if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}")
list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE})
endif()
add_custom_command(OUTPUT ${FLUID_CORE}
COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
DEPENDS paddle_pybind)
add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS})
IF(WIN32)
......@@ -84,6 +92,7 @@ ELSE(WIN32)
COMMAND touch stub.cc
COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
COMMENT "Packing whl packages------>>>"
DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto profiler_py_proto ${PY_FILES})
ENDIF()
......
......@@ -24,6 +24,6 @@ def check_import_scipy(OsName):
if 'DLL load failed' in print_info:
raise ImportError(
print_info +
"\nplease download visual C++ Redistributable for vs 2015, https://www.microsoft.com/en-us/download/details.aspx?id=48145"
"\nplease download Visual C++ Redistributable from https://support.microsoft.com/en-us/topic/the-latest-supported-visual-c-downloads-2647da03-1eea-4433-9aff-95f26a218cc0"
)
return
......@@ -37,7 +37,10 @@ if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix):
try:
if os.name == 'nt':
third_lib_path = current_path + os.sep + '..' + os.sep + 'libs'
os.environ['path'] = third_lib_path + ';' + os.environ['path']
# Will load shared library from 'path' on windows
os.environ[
'path'] = current_path + ';' + third_lib_path + ';' + os.environ[
'path']
sys.path.insert(0, third_lib_path)
# Note: from python3.8, PATH will not take effect
# https://github.com/python/cpython/pull/12302
......@@ -298,7 +301,7 @@ if avx_supported():
"WARNING: AVX is supported on local machine, but you have installed "
"paddlepaddle without avx core. Hence, no_avx core which has worse "
"preformance will be imported.\nYou could reinstall paddlepaddle by "
"'python -m pip install -U paddlepaddle-gpu[==version]' or rebuild "
"'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' or rebuild "
"paddlepaddle WITH_AVX=ON to get better performance.\n"
"The original error is: %s\n" % cpt.get_exception_message(e))
load_noavx = True
......@@ -350,12 +353,19 @@ if load_noavx:
sys.stderr.write(
'Error: Can not import noavx core while this file exists: ' +
current_path + os.sep + 'core_noavx.' + core_suffix + '\n')
elif avx_supported():
sys.stderr.write(
"Error: AVX is support on your machine, but you have installed "
"paddlepaddle without avx core, you should reinstall paddlepaddle by "
"'python -m pip install --force-reinstall paddlepaddle-gpu[==version]\n"
)
else:
sys.stderr.write(
"Error: AVX is not support on your machine, but you have installed "
"paddlepaddle with avx core, you should reinstall paddlepaddle by "
"'python -m pip install -U paddlepaddle-gpu[==version] -f "
"https://paddlepaddle.org.cn/whl/stable_noavx.html'\n")
"paddlepaddle without no_avx core, you should reinstall paddlepaddle by "
"'python -m pip install --force-reinstall paddlepaddle-gpu[==version] -f "
"https://paddlepaddle.org.cn/whl/mkl/stable/noavx.html or "
"https://paddlepaddle.org.cn/whl/openblas/stable/noavx.html\n")
raise e
......
# New custom OP can support Windows/Linux now
if(WITH_GPU OR APPLE)
# GPU custom op tests: compile both .cc and .cu file
# New custom OP can support Windows/Linux/Mac now
if(WITH_GPU OR APPLE)
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
......
......@@ -45,8 +45,12 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
int grid = (numel + block - 1) / block;
PD_DISPATCH_FLOATING_AND_HALF_TYPES(
x.type(), "relu_cuda_forward_kernel", ([&] {
auto cpu_input = x.copy_to<data_t>(paddle::PlaceType::kCPU);
auto gpu_input = cpu_input.copy_to<data_t>(paddle::PlaceType::kGPU);
relu_cuda_forward_kernel<data_t><<<grid, block, 0, x.stream()>>>(
x.data<data_t>(), out.mutable_data<data_t>(x.place()), numel);
gpu_input.data<data_t>(),
out.mutable_data<data_t>(x.place()),
numel);
}));
return {out};
......
......@@ -64,14 +64,29 @@ class TestCheckCompiler(TestABIBase):
# clear environ
self.del_environ()
compiler = 'python' # fake wrong compiler
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check Compiler Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue(
"Compiler Compatibility WARNING" in str(error[0].message))
if not utils.IS_WINDOWS:
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check Compiler Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue(
"Compiler Compatibility WARNING" in str(error[0].message))
def test_exception_windows(self):
# clear environ
self.del_environ()
compiler = 'fake compiler' # fake command
if utils.IS_WINDOWS:
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return False
self.assertFalse(flag)
# check ABI Compatibility WARNING
self.assertTrue(len(error) == 1)
self.assertTrue("Failed to check compiler version for" in
str(error[0].message))
def test_exception_linux(self):
# clear environ
......
......@@ -105,12 +105,12 @@ class TestJITLoad(unittest.TestCase):
in str(e))
if IS_WINDOWS:
self.assertTrue(
r"python\paddle\fluid\tests\custom_op\custom_relu_op.cc:47"
in str(e))
r"python\paddle\fluid\tests\custom_op\custom_relu_op.cc" in
str(e))
else:
self.assertTrue(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cc:47"
in str(e))
"python/paddle/fluid/tests/custom_op/custom_relu_op.cc" in
str(e))
self.assertTrue(caught_exception)
caught_exception = False
......@@ -126,7 +126,7 @@ class TestJITLoad(unittest.TestCase):
"function \"relu_cuda_forward_kernel\" is not implemented for data type `int32_t`"
in str(e))
self.assertTrue(
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu:50" in
"python/paddle/fluid/tests/custom_op/custom_relu_op.cu" in
str(e))
self.assertTrue(caught_exception)
......
......@@ -26,7 +26,7 @@ from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath
from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath, _get_fluid_path
from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
......@@ -69,7 +69,7 @@ def setup(**attr):
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3).
PaddlePaddle (Visual Studio 2017).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility.
......@@ -79,7 +79,7 @@ def setup(**attr):
2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
GCC version.
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2015 update3).
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
Compared with Just-In-Time ``load`` interface, it only compiles once by executing
......@@ -611,7 +611,7 @@ class BuildExtension(build_ext, object):
msg = (
'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.'
'This may lead to multiple activations of the VC env.'
'Please set `DISTUTILS_USE_SDK=1` and try again.')
'Please run `set DISTUTILS_USE_SDK=1` and try again.')
raise UserWarning(msg)
def _record_op_info(self):
......@@ -724,7 +724,7 @@ def load(name,
processes under a individual subprocess. It does not require CMake or Ninja
environment. On Linux platform, it requires GCC compiler whose version is
greater than 5.4 and it should be soft linked to ``/usr/bin/cc`` . On Windows
platform, it requires Visual Studio whose version is greater than 2015 update3.
platform, it requires Visual Studio whose version is greater than 2017.
On MacOS, clang++ is requited. In addition, if compiling Operators supporting
GPU device, please make sure ``nvcc`` compiler is installed in local environment.
......@@ -735,7 +735,7 @@ def load(name,
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3).
PaddlePaddle (Visual Studio 2017).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility.
......@@ -749,7 +749,7 @@ def load(name,
2. On Linux platform, we recommend to use GCC 8.2 as soft linking condidate of ``/usr/bin/cc`` .
Then, Use ``which cc`` to ensure location of ``cc`` and using ``cc --version`` to ensure linking
GCC version.
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2015 update3).
3. On Windows platform, we recommend to install `` Visual Studio`` (>=2017).
**A simple example:**
......@@ -802,9 +802,6 @@ def load(name,
# ensure to use abs path
build_directory = os.path.abspath(build_directory)
# Will load shared library from 'path' on windows
if IS_WINDOWS:
os.environ['path'] = build_directory + ';' + os.environ['path']
log_v("build_directory: {}".format(build_directory), verbose)
......@@ -827,6 +824,7 @@ def load(name,
# write setup.py file and compile it
build_base_dir = os.path.join(build_directory, name)
_write_setup_file(name, sources, file_path, build_base_dir,
extra_include_paths, extra_cxx_cflags, extra_cuda_cflags,
extra_ldflags, verbose)
......
......@@ -55,7 +55,7 @@ CLANG_LINK_FLAGS = [
'-dynamiclib', '-undefined', 'dynamic_lookup', '-arch', 'x86_64'
]
MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
MSVC_LINK_FLAGS = ['/MACHINE:X64']
COMMON_NVCC_FLAGS = ['-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU']
......@@ -371,10 +371,11 @@ def _get_core_name():
Return pybind DSO module name.
"""
import paddle
if paddle.fluid.core.load_noavx:
return 'core_noavx.so'
ext_name = '.pyd' if IS_WINDOWS else '.so'
if not paddle.fluid.core.load_noavx:
return 'core_avx' + ext_name
else:
return 'core_avx.so'
return 'core_noavx' + ext_name
def _get_lib_core_path():
......@@ -386,6 +387,15 @@ def _get_lib_core_path():
return os.path.join(_get_fluid_path(), lib_core_name)
def _get_dll_core_path():
"""
Return real path of libcore_(no)avx.dylib on Windows.
"""
raw_core_name = _get_core_name()
dll_core_name = "paddle_pybind.dll"
return os.path.join(_get_fluid_path(), dll_core_name)
def _reset_so_rpath(so_path):
"""
NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
......@@ -435,9 +445,12 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
# append link flags
extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.extend(MSVC_LINK_FLAGS)
lib_core_name = create_sym_link_if_not_exist()
extra_link_args.append('{}'.format(lib_core_name))
if use_cuda:
extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
kwargs['extra_link_args'] = extra_link_args
else:
########################### Linux Platform ###########################
extra_link_args = kwargs.get('extra_link_args', [])
......@@ -481,24 +494,41 @@ def create_sym_link_if_not_exist():
"""
Create soft symbol link of `core_avx.so` or `core_noavx.so`
"""
assert OS_NAME.startswith('darwin')
assert OS_NAME.startswith('darwin') or IS_WINDOWS
raw_core_name = _get_core_name()
core_path = os.path.join(_get_fluid_path(), raw_core_name)
new_lib_core_path = _get_lib_core_path()
if IS_WINDOWS:
new_dll_core_path = _get_dll_core_path()
# create symbol link on windows
if not os.path.exists(new_dll_core_path):
try:
os.symlink(core_path, new_dll_core_path)
except Exception:
warnings.warn(
"Failed to create soft symbol link for {}.\n You can run prompt as administrator and execute the "
"following command manually: `mklink {} {}`. Now it will create hard link for {} trickly.".
format(raw_core_name, new_dll_core_path, core_path,
raw_core_name))
run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path))
# core_avx or core_noavx with lib suffix
assert os.path.exists(new_dll_core_path)
return raw_core_name[:-4] + ".lib"
# create symbol link
if not os.path.exists(new_lib_core_path):
try:
os.symlink(core_path, new_lib_core_path)
assert os.path.exists(new_lib_core_path)
except Exception:
raise RuntimeError(
"Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`".
format(raw_core_name, core_path, new_lib_core_path))
else:
new_lib_core_path = _get_lib_core_path()
# create symbol link on mac
if not os.path.exists(new_lib_core_path):
try:
os.symlink(core_path, new_lib_core_path)
assert os.path.exists(new_lib_core_path)
except Exception:
raise RuntimeError(
"Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`".
format(raw_core_name, core_path, new_lib_core_path))
# core_avx or core_noavx without suffix
return raw_core_name[:-3]
# core_avx or core_noavx without suffix
return raw_core_name[:-3]
def find_cuda_home():
......@@ -1054,20 +1084,20 @@ def check_abi_compatibility(compiler, verbose=False):
if os.environ.get('PADDLE_SKIP_CHECK_ABI') in ['True', 'true', '1']:
return True
which = 'where' if IS_WINDOWS else 'which'
cmd_out = subprocess.check_output(
[which, compiler], stderr=subprocess.STDOUT)
compiler_path = os.path.realpath(cmd_out.decode()
if six.PY3 else cmd_out).strip()
# step 1. if not found any suitable compiler, raise error
if not any(name in compiler_path
for name in _expected_compiler_current_platform()):
warnings.warn(
WRONG_COMPILER_WARNING.format(
user_compiler=compiler,
paddle_compiler=_expected_compiler_current_platform()[0],
platform=OS_NAME))
return False
if not IS_WINDOWS:
cmd_out = subprocess.check_output(
['which', compiler], stderr=subprocess.STDOUT)
compiler_path = os.path.realpath(cmd_out.decode()
if six.PY3 else cmd_out).strip()
# if not found any suitable compiler, raise warning
if not any(name in compiler_path
for name in _expected_compiler_current_platform()):
warnings.warn(
WRONG_COMPILER_WARNING.format(
user_compiler=compiler,
paddle_compiler=_expected_compiler_current_platform()[0],
platform=OS_NAME))
return False
version = (0, 0, 0)
# clang++ have no ABI compatibility problem
......
......@@ -255,11 +255,15 @@ paddle_bins = ''
if not '${WIN32}':
paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + ('.so' if os.name != 'nt' else '.pyd')]}
if os.name != 'nt':
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.so']}
else:
package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.pyd', '${FLUID_CORE_NAME}' + '.lib']}
if '${HAS_NOAVX_CORE}' == 'ON':
package_data['paddle.fluid'] += ['core_noavx' + ('.so' if os.name != 'nt' else '.pyd')]
package_dir={
'': '${PADDLE_BINARY_DIR}/python',
# The paddle.fluid.proto will be generated while compiling.
......@@ -353,14 +357,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '':
package_data['paddle.libs']+=['libxpurt.so']
### New custom op extension mechanism related ###
# copy paddle_custom_op.lib/paddle_custom_op.dll to libs on Windows
if os.name == 'nt':
shutil.copy('${PADDLE_CUSTOM_OP_IMPORT_LIB}', libs_path)
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_custom_op.lib', 'paddle_custom_op.dll']
# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
os.remove(libs_path+'/__init__.py')
......
......@@ -436,9 +436,172 @@ CPU_PARALLEL_JOB = [
'assign_op_test',
'allocator_facade_frac_flags_test',
'aes_cipher_test',
'test_dist_sparse_tensor_load_adagrad',
'test_dist_mnist_fp16_allreduce',
'test_dist_mnist_gradient_merge',
'test_dist_allreduce_op',
'test_hdfs3',
'test_parallel_dygraph_se_resnext',
'test_dist_fleet_ps9',
'test_dist_fleet_infer',
'test_dist_se_resnext_sync',
'test_dist_oneps',
'test_dist_sparse_load_ps1',
'test_dist_mnist_batch_merge',
'test_dist_fleet_ctr',
'test_dist_fleet_ps10',
'test_parallel_dygraph_transformer',
'test_dist_mnist_fleetapi',
'test_dist_sparse_tensor_load_adam',
'test_dist_fleet_ps4',
'test_dist_fleet_heter_program',
'test_parallel_dygraph_sparse_embedding_over_height',
'test_hdfs2',
'test_dist_sharding_save',
'test_dist_fleet_ps_gpu_ctr',
'test_dist_mnist_backward_deps',
'test_dist_fleet_heter_base',
'test_dist_sparse_tensor_load_sgd',
'test_new_group',
'test_dist_mnist_with_program',
'test_dist_mnist_pg',
'test_dist_sparse_tensor_load_rmsprop',
'test_auto_checkpoint2',
'test_dist_sparse_tensor_load_ftrl',
'test_dist_fleet_ps6',
'test_dist_mnist_fleet_save',
'test_auto_checkpoint1',
'test_dist_fleet_a_sync_optimizer_sync',
'test_dist_fleet_ps3',
'test_dist_se_resnext_nccl',
'test_parallel_dygraph_mnist',
'test_auto_checkpoint_multiple',
'test_dist_fleet_a_sync_optimizer_auto_async',
'test_pipeline',
'test_dist_fleet_ps8',
'test_dist_fleet_sparse_embedding_ctr',
'test_dist_se_resnext_dgc',
'test_dist_fleet_ps7',
'test_dist_fleet_decay',
'test_dist_fleet_a_sync_optimizer_auto_geo',
'test_dist_fleet_geo',
'test_parallel_dygraph_dataparallel',
'test_hdfs1',
'test_dist_mnist_dgc_nccl',
'test_dist_fleet_ctr2',
'test_parallel_dygraph_unused_variables',
'test_dist_mnist_multi_comm',
'test_dist_sparse_tensor_load_momentum',
'test_gen_nccl_id_op',
'test_parallel_dygraph_sparse_embedding',
'test_dist_mnist_ring_allreduce',
'test_fleet_launch_async',
'test_dist_fleet_a_sync_optimizer_geo',
'test_parallel_dygraph_control_flow',
'test_auto_checkpoint',
'test_fleet_pipeline_meta_optimizer',
'test_dist_fleet_heter_ctr',
'test_fleet_graph_execution_meta_optimizer',
'test_fleet_run_random_port',
'test_dist_fleet_ps5',
'test_dist_fleet_a_sync_optimizer_auto',
'test_dist_lookup_sparse_table_fuse_ops',
'test_dist_fleet_a_sync_optimizer_async',
'test_c_comm_init_op',
'test_fleet_launch_nproc',
'test_dist_fleet_simnet',
'test_auto_checkpoint_dist_basic',
'test_fleet_launch_cloud',
'test_dist_fleet_ps',
'test_dist_op',
'test_dist_sparse_load_ps0',
'test_auto_checkpoint3',
'test_dist_fleet_ps2',
'test_dist_fleet_grad_clip',
'test_custom_concat',
'test_analyzer_transformer_fuse',
'test_analyzer_seq_pool1_fuse_statis',
'test_fc_lstm_fuse_pass_cc',
'test_layer_norm_fuse_pass',
'test_fc_gru_fuse_pass_cc',
'test_analyzer_save_model',
'test_fleet_ps',
'test_analyzer_multi_model_prediction',
'test_fleet_base_3',
'test_fleet_base_2',
'test_ascend_trigger',
'test_fleet_amp_meta_optimizer',
'test_fleetrun',
'test_check_abi',
'dense_table_test',
'test_adaptive_pool2d_convert_global_pass',
'test_fleet_recompute_meta_optimizer',
'test_fleet_fp16_allreduce_meta_optimizer',
'test_post_training_quantization_lstm_model',
'test_fleet_metric',
'test_fleet_gradient_merge_meta_optimizer',
'test_fleet_sharding_meta_optimizer',
'test_listen_and_serv_op',
'test_analyzer_zerocopytensor_tensor',
'test_conv_bn_fuse_pass_cc',
'test_collective_optimizer',
'test_bf16_utils',
'test_analyzer_seq_pool1_compare_determine',
'test_avoid_twice_initialization',
'test_callback_early_stop',
'test_fleet_distributed_strategy',
'test_launch_coverage',
'test_sgd_op_bf16',
'test_model_cast_to_bf16',
'test_hybrid_parallel_topology',
'barrier_table_test',
'test_check_error',
'test_fleet_lamb_meta_optimizer',
'test_fleet_rolemaker_2',
'test_distributed_strategy',
'test_rnn_cudnn_params_packing',
'test_communicator_async',
'brpc_utils_test',
'test_analyzer_capi_pd_tensor',
'test_recv_save_op',
'heter_listen_and_server_test',
'test_analyzer_capi_ner',
'test_unsqueeze2_eltwise_fuse_pass',
'test_dgc_optimizer',
'test_fleet_cc',
'test_repeated_fc_relu_fuse_pass_cc',
'heter_server_test',
'test_static_save_load_large',
'graph_node_test',
'test_custom_conj',
'test_fleet_private_function',
'test_fake_init_op',
'brpc_service_sparse_sgd_test',
'test_tf32_cudnn',
'test_communicator_geo',
'test_dispatch_jit',
'test_layer_norm_fuse_pass_cc',
'test_fleet_dgc_meta_optimizer',
'test_fc_fuse_pass_cc',
'test_communicator_sync',
'test_analyzer_capi',
'test_fleet_lars_meta_optimizer',
'test_communicator_half_async',
'test_fleet_localsgd_meta_optimizer',
'test_fleet_amp_init',
'test_fleet_checkpoint',
'test_analyzer_seq_pool1_fuse_compare_zero_copy',
'test_lookup_table_bf16_op',
'test_fleet_meta_optimizer_base',
'table_test',
'test_fleet_rolemaker_new',
'test_fleet_graph_executor',
'test_multi_out_jit',
'test_fleet_utils',
'brpc_service_dense_sgd_test',
]
# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# It run 4 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# just remove it from this list.
TETRAD_PARALLEL_JOB = [
'buffered_allocator_test',
......@@ -477,9 +640,53 @@ TETRAD_PARALLEL_JOB = [
'tensor_test',
'test_repeated_fc_relu_fuse_pass_cc',
'test_mkldnn_caching',
'test_analyzer_seq_pool1',
'test_analyzer_ocr',
'test_analyzer_seq_conv1',
'test_analyzer_small_dam',
'test_analyzer_mobilenet_depthwise_conv',
'test_analyzer_pyramid_dnn',
'test_analyzer_text_classification',
'test_analyzer_rnn2',
'test_analyzer_transformer',
'test_analyzer_resnet50',
'test_analyzer_ner',
'test_analyzer_lac',
'test_analyzer_transformer_profile',
'test_analyzer_mobilenet_transpose',
'test_analyzer_rnn1',
'test_analyzer_seq_pool1_profile',
'test_analyzer_paddletensor_tensor',
'test_analyzer_bert',
'test_analyzer_googlenet',
'zero_copy_tensor_test',
'custom_tensor_test',
'test_fleet_base',
'test_imperative_container_layerdict',
'test_complex_simplenet',
'test_tensor_register_hook',
'test_set_value_op',
'test_tensor_type_promotion',
'test_view_op_reuse_allocation',
'test_complex_grad_accumulated',
'test_sequential',
'test_sequential',
'test_imperative_layers',
'test_dgc_momentum_op',
'test_memcpy_op',
'test_dgc_op',
'test_modelaverage',
'test_lookahead',
'test_callback_visualdl',
'test_new_group_api',
'test_collective_split_embedding_none_divisible',
'test_collective_wait',
'test_collective_split_row_linear',
'test_collective_split_col_linear',
'test_collective_split_embedding',
]
# It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# It run 2 job each time, If it failed due to Insufficient GPU memory or CUBLAS_STATUS_ALLOC_FAILED,
# just remove it from this list.
TWO_PARALLEL_JOB = [
'convert_model2dot_ernie',
......@@ -611,7 +818,6 @@ TWO_PARALLEL_JOB = [
'test_adam_op_multi_thread',
'test_adamax_op',
'test_while_loop_op',
'test_affine_grid_function',
'test_transpose_flatten_concat_fuse_pass',
'test_trace_op',
'test_backward',
......@@ -663,7 +869,6 @@ TWO_PARALLEL_JOB = [
'test_gather_op',
'test_partial_concat_op',
'test_gaussian_random_op',
'test_paddle_imperative_double_grad',
'test_generate_proposals_v2_op',
'test_pad_constant_like',
'test_grid_sample_function',
......@@ -879,6 +1084,11 @@ TWO_PARALLEL_JOB = [
'test_imperative_load_static_param',
'test_fuse_bn_add_act_pass',
'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass',
'test_quantize_transpiler_v2',
'paddle_infer_api_test',
'test_analyzer_ernie',
'lite_resnet50_test',
'lite_mul_model_test',
]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册