未验证 提交 d815fbf9 编写于 作者: A Aurelius84 提交者: GitHub

[CustomOp]Support MacOS platform and Remove libpaddle_custom_op.so dependency (#31976)

* Remove old custom OP to reduce whl package volume

* [Custom OP]Remove old custom OP to reduce whl package volume

* support macos
上级 55730d95
...@@ -367,29 +367,23 @@ endif() ...@@ -367,29 +367,23 @@ endif()
##### 2.0 New custom op extension mechanism related ##### ##### 2.0 New custom op extension mechanism related #####
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_ # if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer) if (WIN32)
set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
set(PADDLE_CUSTOM_OP_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
cc_library(paddle_custom_op_shared set(PADDLE_CUSTOM_OP_SRCS
SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES}) ${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) cc_library(paddle_custom_op_shared
set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op) SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES})
target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
if (LINUX) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
set(PADDLE_CUSTOM_OP_SHARED_LIB set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_custom_op.so target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
CACHE INTERNAL "Paddle custom op lib")
endif()
if (WIN32)
if("${CMAKE_GENERATOR}" STREQUAL "Ninja") if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR}) set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR})
else() else()
...@@ -402,9 +396,3 @@ if (WIN32) ...@@ -402,9 +396,3 @@ if (WIN32)
${paddle_custom_op_lib_path}/paddle_custom_op.dll ${paddle_custom_op_lib_path}/paddle_custom_op.dll
CACHE INTERNAL "Paddle custom op dll") CACHE INTERNAL "Paddle custom op dll")
endif() endif()
if(APPLE)
set(PADDLE_CUSTOM_OP_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/paddle_custom_op.dylib
CACHE INTERNAL "Paddle custom op lib")
endif()
...@@ -414,12 +414,7 @@ void* GetMKLMLDsoHandle() { ...@@ -414,12 +414,7 @@ void* GetMKLMLDsoHandle() {
} }
void* GetOpDsoHandle(const std::string& dso_name) { void* GetOpDsoHandle(const std::string& dso_name) {
#if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW(platform::errors::Unimplemented(
"Create custom cpp op outside framework do not support Apple."));
#else
return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name); return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name);
#endif
} }
void* GetNvtxDsoHandle() { void* GetNvtxDsoHandle() {
......
...@@ -8,11 +8,6 @@ endforeach() ...@@ -8,11 +8,6 @@ endforeach()
add_subdirectory(unittests) add_subdirectory(unittests)
add_subdirectory(book) add_subdirectory(book)
add_subdirectory(custom_op)
# 2.0 New custom OP can support Windows/Linux now
# TODO: support 2.0 New Custom OP on Mac
if(NOT APPLE)
add_subdirectory(custom_op)
endif()
set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120) set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120)
# New custom OP can support Windows/Linux now # New custom OP can support Windows/Linux now
if(WITH_GPU) if(WITH_GPU OR APPLE)
# GPU custom op tests: compile both .cc and .cu file # GPU custom op tests: compile both .cc and .cu file
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
......
...@@ -14,17 +14,21 @@ ...@@ -14,17 +14,21 @@
import os import os
from utils import paddle_includes, extra_compile_args from utils import paddle_includes, extra_compile_args, IS_MAC
from paddle.utils.cpp_extension import CUDAExtension, setup from paddle.utils.cpp_extension import CUDAExtension, setup, CppExtension
# Mac-CI don't support GPU
Extension = CppExtension if IS_MAC else CUDAExtension
sources = ['custom_relu_op.cc', 'custom_relu_op_dup.cc']
if not IS_MAC:
sources.append('custom_relu_op.cu')
# custom_relu_op_dup.cc is only used for multi ops test, # custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this # not a new op, if you want to test only one op, remove this
# source file # source file
setup( setup(
name='custom_relu_module_setup', name='custom_relu_module_setup',
ext_modules=CUDAExtension( # test for not specific name here. ext_modules=Extension( # test for not specific name here.
sources=[ sources=sources, # test for multi ops
'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
], # test for multi ops
include_dirs=paddle_includes, include_dirs=paddle_includes,
extra_compile_args=extra_compile_args)) extra_compile_args=extra_compile_args))
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -52,6 +52,8 @@ class TestCheckCompiler(TestABIBase): ...@@ -52,6 +52,8 @@ class TestCheckCompiler(TestABIBase):
compiler = 'g++' compiler = 'g++'
elif utils.IS_WINDOWS: elif utils.IS_WINDOWS:
compiler = 'cl' compiler = 'cl'
else:
compiler = 'clang'
# Linux: all CI gcc version > 5.4.0 # Linux: all CI gcc version > 5.4.0
# Windows: all CI MSVC version > 19.00.24215 # Windows: all CI MSVC version > 19.00.24215
...@@ -71,7 +73,7 @@ class TestCheckCompiler(TestABIBase): ...@@ -71,7 +73,7 @@ class TestCheckCompiler(TestABIBase):
self.assertTrue( self.assertTrue(
"Compiler Compatibility WARNING" in str(error[0].message)) "Compiler Compatibility WARNING" in str(error[0].message))
def test_exception(self): def test_exception_linux(self):
# clear environ # clear environ
self.del_environ() self.del_environ()
compiler = 'python' # fake command compiler = 'python' # fake command
...@@ -95,6 +97,28 @@ class TestCheckCompiler(TestABIBase): ...@@ -95,6 +97,28 @@ class TestCheckCompiler(TestABIBase):
# restore # restore
utils._expected_compiler_current_platform = raw_func utils._expected_compiler_current_platform = raw_func
def test_exception_mac(self):
# clear environ
self.del_environ()
compiler = 'python' # fake command
if utils.OS_NAME.startswith('darwin'):
def fake():
return [compiler]
# mock a fake function
raw_func = utils._expected_compiler_current_platform
utils._expected_compiler_current_platform = fake
with warnings.catch_warnings(record=True) as error:
flag = utils.check_abi_compatibility(compiler, verbose=True)
# check return True
self.assertTrue(flag)
# check ABI Compatibility without WARNING
self.assertTrue(len(error) == 0)
# restore
utils._expected_compiler_current_platform = raw_func
class TestRunCMDException(unittest.TestCase): class TestRunCMDException(unittest.TestCase):
def test_exception(self): def test_exception(self):
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -21,9 +21,9 @@ from paddle import nn ...@@ -21,9 +21,9 @@ from paddle import nn
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_MAC
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed. # cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format( file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format(
get_build_directory()) get_build_directory())
...@@ -35,9 +35,13 @@ if os.name == 'nt' and os.path.isfile(file): ...@@ -35,9 +35,13 @@ if os.name == 'nt' and os.path.isfile(file):
# custom_relu_op_dup.cc is only used for multi ops test, # custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this # not a new op, if you want to test only one op, remove this
# source file # source file
source_files = ['custom_relu_op.cc']
if not IS_MAC:
source_files.append('custom_relu_op.cu')
custom_module = load( custom_module = load(
name='custom_relu_for_model_jit', name='custom_relu_for_model_jit',
sources=['custom_relu_op.cc', 'custom_relu_op.cu'], sources=source_files,
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_cc_args, # test for cc flags extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
...@@ -84,7 +88,7 @@ class TestDygraphModel(unittest.TestCase): ...@@ -84,7 +88,7 @@ class TestDygraphModel(unittest.TestCase):
for i in range(self.batch_num) for i in range(self.batch_num)
] ]
self.devices = ['cpu', 'gpu'] self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu']
# for saving model # for saving model
self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams" self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams"
...@@ -191,7 +195,7 @@ class TestStaticModel(unittest.TestCase): ...@@ -191,7 +195,7 @@ class TestStaticModel(unittest.TestCase):
for i in range(self.batch_num) for i in range(self.batch_num)
] ]
self.devices = ['cpu', 'gpu'] self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu']
# for saving model # for saving model
self.model_path_template = "infer_model/custom_relu_static_model_{}_{}" self.model_path_template = "infer_model/custom_relu_static_model_{}_{}"
......
...@@ -18,10 +18,10 @@ import paddle ...@@ -18,10 +18,10 @@ import paddle
import numpy as np import numpy as np
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS, IS_MAC
from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed. # cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format( file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format(
get_build_directory()) get_build_directory())
...@@ -33,11 +33,13 @@ if os.name == 'nt' and os.path.isfile(file): ...@@ -33,11 +33,13 @@ if os.name == 'nt' and os.path.isfile(file):
# custom_relu_op_dup.cc is only used for multi ops test, # custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this # not a new op, if you want to test only one op, remove this
# source file # source file
sources = ['custom_relu_op.cc', 'custom_relu_op_dup.cc']
if not IS_MAC:
sources.append('custom_relu_op.cu')
custom_module = load( custom_module = load(
name='custom_relu_module_jit', name='custom_relu_module_jit',
sources=[ sources=sources,
'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
],
extra_include_paths=paddle_includes, # add for Coverage CI extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_cc_args, # test for cc flags extra_cxx_cflags=extra_cc_args, # test for cc flags
extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags extra_cuda_cflags=extra_nvcc_args, # test for nvcc flags
...@@ -112,6 +114,9 @@ class TestJITLoad(unittest.TestCase): ...@@ -112,6 +114,9 @@ class TestJITLoad(unittest.TestCase):
self.assertTrue(caught_exception) self.assertTrue(caught_exception)
caught_exception = False caught_exception = False
# MAC-CI don't support GPU
if IS_MAC:
return
try: try:
x = np.random.uniform(-1, 1, [4, 8]).astype('int32') x = np.random.uniform(-1, 1, [4, 8]).astype('int32')
custom_relu_dynamic(custom_module.custom_relu, 'gpu', 'int32', x) custom_relu_dynamic(custom_module.custom_relu, 'gpu', 'int32', x)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
......
...@@ -13,10 +13,13 @@ ...@@ -13,10 +13,13 @@
# limitations under the License. # limitations under the License.
import os import os
import sys
import six import six
from distutils.sysconfig import get_python_lib from distutils.sysconfig import get_python_lib
from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS
IS_MAC = sys.platform.startswith('darwin')
site_packages_path = get_python_lib() site_packages_path = get_python_lib()
# Note(Aurelius84): We use `add_test` in Cmake to config how to run unittest in CI. # Note(Aurelius84): We use `add_test` in Cmake to config how to run unittest in CI.
# `PYTHONPATH` will be set as `build/python/paddle` that will make no way to find # `PYTHONPATH` will be set as `build/python/paddle` that will make no way to find
......
...@@ -22,14 +22,15 @@ from setuptools.command.easy_install import easy_install ...@@ -22,14 +22,15 @@ from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from distutils.command.build import build from distutils.command.build import build
from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension_kwargs, add_compile_flag from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension_kwargs, add_compile_flag, run_cmd
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
from .extension_utils import clean_object_if_change_cflags from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath
from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
from .extension_utils import CLANG_COMPILE_FLAGS, CLANG_LINK_FLAGS
from ...fluid import core from ...fluid import core
...@@ -50,14 +51,14 @@ else: ...@@ -50,14 +51,14 @@ else:
def setup(**attr): def setup(**attr):
""" """
The interface is used to config the process of compiling customized operators, The interface is used to config the process of compiling customized operators,
mainly includes how to complile shared library, automatically generate python API mainly includes how to compile shared library, automatically generate python API
and install it into site-package. It supports using customized operators directly with and install it into site-package. It supports using customized operators directly with
``import`` statement. ``import`` statement.
It encapsulates the python built-in ``setuptools.setup`` function and keeps arguments It encapsulates the python built-in ``setuptools.setup`` function and keeps arguments
and usage same as the native interface. Meanwhile, it hiddens Paddle inner framework and usage same as the native interface. Meanwhile, it hiddens Paddle inner framework
concepts, such as necessary compiling flags, included paths of head files, and linking concepts, such as necessary compiling flags, included paths of head files, and linking
flags. It also will automatically search and valid local enviromment and versions of flags. It also will automatically search and valid local environment and versions of
``cc(Linux)`` , ``cl.exe(Windows)`` and ``nvcc`` , then compiles customized operators ``cc(Linux)`` , ``cl.exe(Windows)`` and ``nvcc`` , then compiles customized operators
supporting CPU or GPU device according to the specified Extension type. supporting CPU or GPU device according to the specified Extension type.
...@@ -67,7 +68,7 @@ def setup(**attr): ...@@ -67,7 +68,7 @@ def setup(**attr):
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2. then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it shoule be greater than or equal to that of For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3). PaddlePaddle (Visual Studio 2015 update3).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility. occur because of ABI compatibility.
...@@ -130,7 +131,7 @@ def setup(**attr): ...@@ -130,7 +131,7 @@ def setup(**attr):
ext_modules(Extension): Specify the Extension instance including customized operator source files, compiling flags et.al. ext_modules(Extension): Specify the Extension instance including customized operator source files, compiling flags et.al.
If only compile operator supporting CPU device, please use ``CppExtension`` ; If compile operator If only compile operator supporting CPU device, please use ``CppExtension`` ; If compile operator
supporting CPU and GPU devices, please use ``CUDAExtension`` . supporting CPU and GPU devices, please use ``CUDAExtension`` .
include_dirs(list[str], optional): Specify the extra include directoies to search head files. The interface will automatically add include_dirs(list[str], optional): Specify the extra include directories to search head files. The interface will automatically add
``site-package/paddle/include`` . Please add the corresponding directory path if including third-party ``site-package/paddle/include`` . Please add the corresponding directory path if including third-party
head files. Default is None. head files. Default is None.
extra_compile_args(list[str] | dict, optional): Specify the extra compiling flags such as ``-O3`` . If set ``list[str]`` , all these flags extra_compile_args(list[str] | dict, optional): Specify the extra compiling flags such as ``-O3`` . If set ``list[str]`` , all these flags
...@@ -158,7 +159,7 @@ def setup(**attr): ...@@ -158,7 +159,7 @@ def setup(**attr):
setup(name='custom_module', setup(name='custom_module',
ext_modules=CUDAExtension( ext_modules=CUDAExtension(
sources=['relu_op.cc', 'relu_op.cu']) sources=['relu_op.cc', 'relu_op.cu'])
# After running `python setup.py install` # After running `python setup.py install`
from custom_module import relu from custom_module import relu
""" """
...@@ -209,7 +210,7 @@ def CppExtension(sources, *args, **kwargs): ...@@ -209,7 +210,7 @@ def CppExtension(sources, *args, **kwargs):
Op Kernel only supporting CPU device. Please use ``CUDAExtension`` if you want to Op Kernel only supporting CPU device. Please use ``CUDAExtension`` if you want to
compile Op Kernel that supports both CPU and GPU devices. compile Op Kernel that supports both CPU and GPU devices.
It furtherly encapsulates python built-in ``setuptools.Extension`` .The arguments and It further encapsulates python built-in ``setuptools.Extension`` .The arguments and
usage are same as the native interface, except for no need to explicitly specify usage are same as the native interface, except for no need to explicitly specify
``name`` . ``name`` .
...@@ -259,7 +260,7 @@ def CUDAExtension(sources, *args, **kwargs): ...@@ -259,7 +260,7 @@ def CUDAExtension(sources, *args, **kwargs):
Op Kernel supporting both CPU and GPU devices. Please use ``CppExtension`` if you want to Op Kernel supporting both CPU and GPU devices. Please use ``CppExtension`` if you want to
compile Op Kernel that supports only CPU device. compile Op Kernel that supports only CPU device.
It furtherly encapsulates python built-in ``setuptools.Extension`` .The arguments and It further encapsulates python built-in ``setuptools.Extension`` .The arguments and
usage are same as the native interface, except for no need to explicitly specify usage are same as the native interface, except for no need to explicitly specify
``name`` . ``name`` .
...@@ -367,11 +368,14 @@ class BuildExtension(build_ext, object): ...@@ -367,11 +368,14 @@ class BuildExtension(build_ext, object):
self.build_lib = self.output_dir self.build_lib = self.output_dir
def build_extensions(self): def build_extensions(self):
if OS_NAME.startswith("darwin"):
self._valid_clang_compiler()
self._check_abi() self._check_abi()
# Note(Aurelius84): If already compiling source before, we should check whether # Note(Aurelius84): If already compiling source before, we should check whether
# cflags have changed and delete the built shared library to re-compile the source # cflags have changed and delete the built shared library to re-compile the source
# even though source file content keep unchanaged. # even though source file content keep unchanged.
so_name = self.get_ext_fullpath(self.extensions[0].name) so_name = self.get_ext_fullpath(self.extensions[0].name)
clean_object_if_change_cflags( clean_object_if_change_cflags(
os.path.abspath(so_name), self.extensions[0]) os.path.abspath(so_name), self.extensions[0])
...@@ -397,17 +401,21 @@ class BuildExtension(build_ext, object): ...@@ -397,17 +401,21 @@ class BuildExtension(build_ext, object):
cflags = copy.deepcopy(extra_postargs) cflags = copy.deepcopy(extra_postargs)
try: try:
original_compiler = self.compiler.compiler_so original_compiler = self.compiler.compiler_so
# ncvv compile CUDA source # nvcc compile CUDA source
if is_cuda_file(src): if is_cuda_file(src):
if core.is_compiled_with_rocm(): if core.is_compiled_with_rocm():
assert ROCM_HOME is not None, "Not found ROCM runtime, please use `export ROCM_PATH= XXX` to specific it." assert ROCM_HOME is not None, "Not found ROCM runtime, \
please use `export ROCM_PATH= XXX` to specify it."
hipcc_cmd = os.path.join(ROCM_HOME, 'bin', 'hipcc') hipcc_cmd = os.path.join(ROCM_HOME, 'bin', 'hipcc')
self.compiler.set_executable('compiler_so', hipcc_cmd) self.compiler.set_executable('compiler_so', hipcc_cmd)
# {'nvcc': {}, 'cxx: {}} # {'nvcc': {}, 'cxx: {}}
if isinstance(cflags, dict): if isinstance(cflags, dict):
cflags = cflags['hipcc'] cflags = cflags['hipcc']
else: else:
assert CUDA_HOME is not None, "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it." assert CUDA_HOME is not None, "Not found CUDA runtime, \
please use `export CUDA_HOME= XXX` to specify it."
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc') nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
self.compiler.set_executable('compiler_so', nvcc_cmd) self.compiler.set_executable('compiler_so', nvcc_cmd)
# {'nvcc': {}, 'cxx: {}} # {'nvcc': {}, 'cxx: {}}
...@@ -424,7 +432,7 @@ class BuildExtension(build_ext, object): ...@@ -424,7 +432,7 @@ class BuildExtension(build_ext, object):
original_compile(obj, src, ext, cc_args, cflags, pp_opts) original_compile(obj, src, ext, cc_args, cflags, pp_opts)
finally: finally:
# restore original_compiler # restore original_compiler
self.compiler.compiler_so = original_compiler self.compiler.set_executable('compiler_so', original_compiler)
def win_custom_single_compiler(sources, def win_custom_single_compiler(sources,
output_dir=None, output_dir=None,
...@@ -470,7 +478,9 @@ class BuildExtension(build_ext, object): ...@@ -470,7 +478,9 @@ class BuildExtension(build_ext, object):
src = src_list[0] src = src_list[0]
obj = obj_list[0] obj = obj_list[0]
if is_cuda_file(src): if is_cuda_file(src):
assert CUDA_HOME is not None, "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it." assert CUDA_HOME is not None, "Not found CUDA runtime, \
please use `export CUDA_HOME= XXX` to specify it."
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc') nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
if isinstance(self.cflags, dict): if isinstance(self.cflags, dict):
cflags = self.cflags['nvcc'] cflags = self.cflags['nvcc']
...@@ -548,22 +558,42 @@ class BuildExtension(build_ext, object): ...@@ -548,22 +558,42 @@ class BuildExtension(build_ext, object):
print("Compiling user custom op, it will cost a few seconds.....") print("Compiling user custom op, it will cost a few seconds.....")
build_ext.build_extensions(self) build_ext.build_extensions(self)
# Reset runtime library path on MacOS platform
so_path = self.get_ext_fullpath(self.extensions[0]._full_name)
_reset_so_rpath(so_path)
def get_ext_filename(self, fullname): def get_ext_filename(self, fullname):
# for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so # for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so
ext_name = super(BuildExtension, self).get_ext_filename(fullname) ext_name = super(BuildExtension, self).get_ext_filename(fullname)
split_str = '.'
name_items = ext_name.split(split_str)
if self.no_python_abi_suffix and six.PY3: if self.no_python_abi_suffix and six.PY3:
split_str = '.'
name_items = ext_name.split(split_str)
assert len( assert len(
name_items name_items
) > 2, "Expected len(name_items) > 2, but received {}".format( ) > 2, "Expected len(name_items) > 2, but received {}".format(
len(name_items)) len(name_items))
name_items.pop(-2) name_items.pop(-2)
# custommed_extension.so
ext_name = split_str.join(name_items) ext_name = split_str.join(name_items)
# custommed_extension.dylib
if OS_NAME.startswith('darwin'):
name_items[-1] = 'dylib'
ext_name = split_str.join(name_items)
return ext_name return ext_name
def _valid_clang_compiler(self):
"""
Make sure to use Clang as compiler on Mac platform
"""
compiler_infos = ['clang'] + CLANG_COMPILE_FLAGS
linker_infos = ['clang'] + CLANG_LINK_FLAGS
self.compiler.set_executables(
compiler=compiler_infos,
compiler_so=compiler_infos,
compiler_cxx=['clang'],
linker_exe=['clang'],
linker_so=linker_infos)
def _check_abi(self): def _check_abi(self):
""" """
Check ABI Compatibility. Check ABI Compatibility.
...@@ -628,6 +658,8 @@ class EasyInstallCommand(easy_install, object): ...@@ -628,6 +658,8 @@ class EasyInstallCommand(easy_install, object):
will_rename = False will_rename = False
if OS_NAME.startswith('linux') and ext == '.so': if OS_NAME.startswith('linux') and ext == '.so':
will_rename = True will_rename = True
elif OS_NAME.startswith('darwin') and ext == '.dylib':
will_rename = True
elif IS_WINDOWS and ext == '.pyd': elif IS_WINDOWS and ext == '.pyd':
will_rename = True will_rename = True
...@@ -702,7 +734,7 @@ def load(name, ...@@ -702,7 +734,7 @@ def load(name,
For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2,
then the version of user's local machine should satisfy GCC >= 8.2. then the version of user's local machine should satisfy GCC >= 8.2.
For Windows, Visual Studio version will be checked, and it shoule be greater than or equal to that of For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of
PaddlePaddle (Visual Studio 2015 update3). PaddlePaddle (Visual Studio 2015 update3).
If the above conditions are not met, the corresponding warning will be printed, and a fatal error may If the above conditions are not met, the corresponding warning will be printed, and a fatal error may
occur because of ABI compatibility. occur because of ABI compatibility.
...@@ -729,7 +761,7 @@ def load(name, ...@@ -729,7 +761,7 @@ def load(name,
custom_op_module = load( custom_op_module = load(
name="op_shared_libary_name", # name of shared library name="op_shared_libary_name", # name of shared library
sources=['relu_op.cc', 'relu_op.cu'], # source files of cusomized op sources=['relu_op.cc', 'relu_op.cu'], # source files of customized op
extra_cxx_cflags=['-g', '-w'], # optional, specify extra flags to compile .cc/.cpp file extra_cxx_cflags=['-g', '-w'], # optional, specify extra flags to compile .cc/.cpp file
extra_cuda_cflags=['-O2'], # optional, specify extra flags to compile .cu file extra_cuda_cflags=['-O2'], # optional, specify extra flags to compile .cu file
verbose=True # optional, specify to output log information verbose=True # optional, specify to output log information
...@@ -761,7 +793,7 @@ def load(name, ...@@ -761,7 +793,7 @@ def load(name,
verbose(bool, optional): whether to verbose compiled log information. Default is False verbose(bool, optional): whether to verbose compiled log information. Default is False
Returns: Returns:
Moudle: A callable python module contains all CustomOp Layer APIs. Module: A callable python module contains all CustomOp Layer APIs.
""" """
......
...@@ -44,6 +44,13 @@ MSVC_COMPILE_FLAGS = [ ...@@ -44,6 +44,13 @@ MSVC_COMPILE_FLAGS = [
'/wd4190', '/EHsc', '/w', '/DGOOGLE_GLOG_DLL_DECL', '/wd4190', '/EHsc', '/w', '/DGOOGLE_GLOG_DLL_DECL',
'/DBOOST_HAS_STATIC_ASSERT', '/DNDEBUG', '/DPADDLE_USE_DSO' '/DBOOST_HAS_STATIC_ASSERT', '/DNDEBUG', '/DPADDLE_USE_DSO'
] ]
CLANG_COMPILE_FLAGS = [
'-fno-common', '-dynamic', '-DNDEBUG', '-g', '-fwrapv', '-O3', '-arch',
'x86_64'
]
CLANG_LINK_FLAGS = [
'-dynamiclib', '-undefined', 'dynamic_lookup', '-arch', 'x86_64'
]
MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib'] MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
...@@ -247,7 +254,7 @@ class VersionManager: ...@@ -247,7 +254,7 @@ class VersionManager:
def combine_hash(md5, value): def combine_hash(md5, value):
""" """
Return new hash value. Return new hash value.
DO NOT use `hash()` beacuse it doesn't generate stable value between different process. DO NOT use `hash()` because it doesn't generate stable value between different process.
See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions
""" """
md5.update(repr(value).encode()) md5.update(repr(value).encode())
...@@ -286,13 +293,13 @@ def clean_object_if_change_cflags(so_path, extension): ...@@ -286,13 +293,13 @@ def clean_object_if_change_cflags(so_path, extension):
if os.path.exists(so_path) and os.path.exists(version_file): if os.path.exists(so_path) and os.path.exists(version_file):
old_version_info = deserialize(version_file) old_version_info = deserialize(version_file)
so_version = old_version_info.get(so_name, None) so_version = old_version_info.get(so_name, None)
# delete shared library file if versison is changed to re-compile it. # delete shared library file if version is changed to re-compile it.
if so_version is not None and so_version != versioner.version: if so_version is not None and so_version != versioner.version:
log_v( log_v(
"Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.". "Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.".
format(so_name, versioner.version, version_file)) format(so_name, versioner.version, version_file))
os.remove(so_path) os.remove(so_path)
# upate new version information # update new version information
new_version_info = versioner.details new_version_info = versioner.details
new_version_info[so_name] = versioner.version new_version_info[so_name] = versioner.version
serialize(version_file, new_version_info) serialize(version_file, new_version_info)
...@@ -348,6 +355,54 @@ def get_cuda_arch_flags(cflags): ...@@ -348,6 +355,54 @@ def get_cuda_arch_flags(cflags):
return [] return []
def _get_fluid_path():
"""
Return installed fluid dir path.
"""
import paddle
return os.path.join(os.path.dirname(paddle.__file__), 'fluid')
def _get_core_name():
"""
Return pybind DSO module name.
"""
import paddle
if paddle.fluid.core.load_noavx:
return 'core_noavx.so'
else:
return 'core_avx.so'
def _get_lib_core_path():
"""
Return real path of libcore_(no)avx.dylib on MacOS.
"""
raw_core_name = _get_core_name()
lib_core_name = "lib{}.dylib".format(raw_core_name[:-3])
return os.path.join(_get_fluid_path(), lib_core_name)
def _reset_so_rpath(so_path):
"""
NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
in setup.py.in. While loading custom op, `@loader_path` is the dirname of custom op
instead of `paddle/fluid`. So we modify `@loader_path` from custom dylib into `@rpath`
to ensure dynamic loader find it correctly.
Moreover, we will add `-rpath site-packages/paddle/fluid` while linking the dylib so
that we don't need to set `LD_LIBRARY_PATH` any more.
"""
assert os.path.exists(so_path)
if OS_NAME.startswith("darwin"):
origin_runtime_path = "@loader_path/../libs/"
rpath = "@rpath/{}".format(_get_core_name())
cmd = 'install_name_tool -change {} {} {}'.format(origin_runtime_path,
rpath, so_path)
run_cmd(cmd)
def normalize_extension_kwargs(kwargs, use_cuda=False): def normalize_extension_kwargs(kwargs, use_cuda=False):
""" """
Normalize include_dirs, library_dir and other attributes in kwargs. Normalize include_dirs, library_dir and other attributes in kwargs.
...@@ -381,15 +436,28 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -381,15 +436,28 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib']) extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
kwargs['extra_link_args'] = extra_link_args kwargs['extra_link_args'] = extra_link_args
else: else:
########################### Linux Platform ###########################
extra_link_args = kwargs.get('extra_link_args', [])
# On Linux, GCC support '-l:xxx.so' to specify the library name
# without `lib` prefix.
if OS_NAME.startswith('linux'):
extra_link_args.append('-l:{}'.format(_get_core_name()))
########################### MacOS Platform ###########################
else:
# See _reset_so_rpath for details.
extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
# On MacOS, ld don't support `-l:xx`, so we create a
# libcore_avx.dylib symbol link.
lib_core_name = create_sym_link_if_not_exist()
extra_link_args.append('-l{}'.format(lib_core_name))
########################### -- END -- ###########################
add_compile_flag(extra_compile_args, ['-w']) # disable warning add_compile_flag(extra_compile_args, ['-w']) # disable warning
# Note(Aurelius84): This marco will impact memory layout of `Tensor`. # Note(Aurelius84): This marco will impact memory layout of `Tensor`.
# We align it automatially with pre-installed Paddle. # We align it automatically with pre-installed Paddle.
if core.is_compiled_with_mkldnn(): if core.is_compiled_with_mkldnn():
add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN']) add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN'])
# append link flags
extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.append('-lpaddle_custom_op')
if use_cuda: if use_cuda:
extra_link_args.append('-lcudart') extra_link_args.append('-lcudart')
...@@ -406,6 +474,30 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): ...@@ -406,6 +474,30 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
return kwargs return kwargs
def create_sym_link_if_not_exist():
"""
Create soft symbol link of `core_avx.so` or `core_noavx.so`
"""
assert OS_NAME.startswith('darwin')
raw_core_name = _get_core_name()
core_path = os.path.join(_get_fluid_path(), raw_core_name)
new_lib_core_path = _get_lib_core_path()
# create symbol link
if not os.path.exists(new_lib_core_path):
try:
os.symlink(core_path, new_lib_core_path)
assert os.path.exists(new_lib_core_path)
except Exception:
raise RuntimeError(
"Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`".
format(raw_core_name, core_path, new_lib_core_path))
# core_avx or core_noavx without suffix
return raw_core_name[:-3]
def find_cuda_home(): def find_cuda_home():
""" """
Use heuristic method to find cuda path Use heuristic method to find cuda path
...@@ -518,6 +610,11 @@ def find_paddle_includes(use_cuda=False): ...@@ -518,6 +610,11 @@ def find_paddle_includes(use_cuda=False):
cuda_include_dir = find_cuda_includes() cuda_include_dir = find_cuda_includes()
include_dirs.extend(cuda_include_dir) include_dirs.extend(cuda_include_dir)
if OS_NAME.startswith('darwin'):
# NOTE(Aurelius84): Ensure to find std v1 headers correctly.
std_v1_includes = '/Library/Developer/CommandLineTools/usr/include/c++/v1/'
include_dirs.append(std_v1_includes)
return include_dirs return include_dirs
...@@ -567,6 +664,9 @@ def find_paddle_libraries(use_cuda=False): ...@@ -567,6 +664,9 @@ def find_paddle_libraries(use_cuda=False):
cuda_lib_dir = find_cuda_libraries() cuda_lib_dir = find_cuda_libraries()
paddle_lib_dirs.extend(cuda_lib_dir) paddle_lib_dirs.extend(cuda_lib_dir)
# add `paddle/fluid` to search `core_avx.so` or `core_noavx.so`
paddle_lib_dirs.append(_get_fluid_path())
return paddle_lib_dirs return paddle_lib_dirs
...@@ -614,9 +714,6 @@ def get_build_directory(verbose=False): ...@@ -614,9 +714,6 @@ def get_build_directory(verbose=False):
if IS_WINDOWS: if IS_WINDOWS:
root_extensions_directory = os.path.normpath( root_extensions_directory = os.path.normpath(
root_extensions_directory) root_extensions_directory)
elif OS_NAME.startswith('darwin'):
# TODO(Aurelius84): consider macOs
raise NotImplementedError("Not support Mac now.")
log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.". log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.".
format(root_extensions_directory), verbose) format(root_extensions_directory), verbose)
...@@ -654,6 +751,8 @@ def _import_module_from_library(module_name, build_directory, verbose=False): ...@@ -654,6 +751,8 @@ def _import_module_from_library(module_name, build_directory, verbose=False):
""" """
if IS_WINDOWS: if IS_WINDOWS:
dynamic_suffix = '.pyd' dynamic_suffix = '.pyd'
elif OS_NAME.startswith('darwin'):
dynamic_suffix = '.dylib'
else: else:
dynamic_suffix = '.so' dynamic_suffix = '.so'
ext_path = os.path.join(build_directory, module_name + dynamic_suffix) ext_path = os.path.join(build_directory, module_name + dynamic_suffix)
...@@ -708,7 +807,7 @@ def _custom_api_content(op_name): ...@@ -708,7 +807,7 @@ def _custom_api_content(op_name):
# Set 'float32' temporarily, and the actual dtype of output variable will be inferred # Set 'float32' temporarily, and the actual dtype of output variable will be inferred
# in runtime. # in runtime.
outs[out_name] = helper.create_variable(dtype='float32') outs[out_name] = helper.create_variable(dtype='float32')
helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs) helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs)
res = [outs[out_name] for out_name in out_names] res = [outs[out_name] for out_name in out_names]
...@@ -757,7 +856,7 @@ def _get_api_inputs_str(op_name): ...@@ -757,7 +856,7 @@ def _get_api_inputs_str(op_name):
# e.g: x, y, z # e.g: x, y, z
param_names = in_names + attr_names param_names = in_names + attr_names
# NOTE(chenweihang): we add suffix `@VECTOR` for std::vector<Tensor> input, # NOTE(chenweihang): we add suffix `@VECTOR` for std::vector<Tensor> input,
# but the string contains `@` cannot used as argument name, so we split # but the string contains `@` cannot used as argument name, so we split
# input name by `@`, and only use first substr as argument # input name by `@`, and only use first substr as argument
params_str = ','.join([p.split("@")[0].lower() for p in param_names]) params_str = ','.join([p.split("@")[0].lower() for p in param_names])
# e.g: {'X': x, 'Y': y, 'Z': z} # e.g: {'X': x, 'Y': y, 'Z': z}
......
...@@ -351,10 +351,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '': ...@@ -351,10 +351,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '':
### New custom op extension mechanism related ### ### New custom op extension mechanism related ###
# copy libpaddle_custom_op.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_custom_op.so']
# copy paddle_custom_op.lib/paddle_custom_op.dll to libs on Windows # copy paddle_custom_op.lib/paddle_custom_op.dll to libs on Windows
if os.name == 'nt': if os.name == 'nt':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册