[CustomOp]Support MacOS platform and Remove libpaddle_custom_op.so dependency (#31976)

* Remove old custom OP to reduce whl package volume * [Custom OP]Remove old custom OP to reduce whl package volume * support macos

[CustomOp]Support MacOS platform and Remove libpaddle_custom_op.so dependency (#31976)
* Remove old custom OP to reduce whl package volume * [Custom OP]Remove old custom OP to reduce whl package volume * support macos
d815fbf9 · Aurelius84 · GitHub · 55730d95 · d815fbf9 · d815fbf9
13 changed file
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -367,29 +367,23 @@ endif()
 ##### 2.0 New custom op extension mechanism related #####
 # if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
-set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
+if (WIN32)
+  set(PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer)
-set(PADDLE_CUSTOM_OP_SRCS
-    ${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
-    ${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
-    ${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
-set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
-cc_library(paddle_custom_op_shared
+  set(PADDLE_CUSTOM_OP_SRCS
-    SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES})
+      ${CMAKE_CURRENT_SOURCE_DIR}/custom_operator.cc
+      ${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_tensor.cc
+      ${CMAKE_CURRENT_SOURCE_DIR}/../extension/src/ext_op_meta_info.cc
+      ${CMAKE_SOURCE_DIR}/paddle/fluid/imperative/layer.cc)
+  set(PADDLE_CUSTOM_OP_SRCS ${PADDLE_CUSTOM_OP_SRCS} PARENT_SCOPE)
-get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
+  cc_library(paddle_custom_op_shared
-set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
+      SHARED SRCS ${PADDLE_CUSTOM_OP_SRCS} DEPS ${PADDLE_CUSTOM_OP_MODULES})
-target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
-if (LINUX)
+  get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
-  set(PADDLE_CUSTOM_OP_SHARED_LIB
+  set_target_properties(paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op)
-      ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_custom_op.so
+  target_link_libraries(paddle_custom_op_shared ${os_dependency_modules})
-      CACHE INTERNAL "Paddle custom op lib")
-endif()
-if (WIN32)
  if("${CMAKE_GENERATOR}" STREQUAL "Ninja")
    set(paddle_custom_op_lib_path ${CMAKE_CURRENT_BINARY_DIR})
  else()
@@ -402,9 +396,3 @@ if (WIN32)
      ${paddle_custom_op_lib_path}/paddle_custom_op.dll
      CACHE INTERNAL "Paddle custom op dll")
 endif()
-if(APPLE)
-  set(PADDLE_CUSTOM_OP_SHARED_LIB
-      ${PADDLE_BINARY_DIR}/paddle/fluid/framework/paddle_custom_op.dylib
-      CACHE INTERNAL "Paddle custom op lib")
-endif()
--- a/paddle/fluid/platform/dynload/dynamic_loader.cc
+++ b/paddle/fluid/platform/dynload/dynamic_loader.cc
@@ -414,12 +414,7 @@ void* GetMKLMLDsoHandle() {
 }
 void* GetOpDsoHandle(const std::string& dso_name) {
-#if defined(__APPLE__) || defined(__OSX__)
-  PADDLE_THROW(platform::errors::Unimplemented(
-      "Create custom cpp op outside framework do not support Apple."));
-#else
  return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name);
-#endif
 }
 void* GetNvtxDsoHandle() {

--- a/python/paddle/fluid/tests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/CMakeLists.txt
@@ -8,11 +8,6 @@ endforeach()
 add_subdirectory(unittests)
 add_subdirectory(book)
+add_subdirectory(custom_op)
-# 2.0 New custom OP can support Windows/Linux now
-# TODO: support 2.0 New Custom OP on Mac
-if(NOT APPLE)
-  add_subdirectory(custom_op)
-endif()
 set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120)
--- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt
 # New custom OP can support Windows/Linux now
-if(WITH_GPU)
+if(WITH_GPU OR APPLE) 
    # GPU custom op tests: compile both .cc and .cu file
    py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
    py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)

--- a/python/paddle/fluid/tests/custom_op/custom_relu_setup.py
+++ b/python/paddle/fluid/tests/custom_op/custom_relu_setup.py
@@ -14,17 +14,21 @@
 import os
-from utils import paddle_includes, extra_compile_args
+from utils import paddle_includes, extra_compile_args, IS_MAC
-from paddle.utils.cpp_extension import CUDAExtension, setup
+from paddle.utils.cpp_extension import CUDAExtension, setup, CppExtension
+# Mac-CI don't support GPU
+Extension = CppExtension if IS_MAC else CUDAExtension
+sources = ['custom_relu_op.cc', 'custom_relu_op_dup.cc']
+if not IS_MAC:
+    sources.append('custom_relu_op.cu')
 # custom_relu_op_dup.cc is only used for multi ops test,
 # not a new op, if you want to test only one op, remove this
 # source file
 setup(
    name='custom_relu_module_setup',
-    ext_modules=CUDAExtension(  # test for not specific name here.
+    ext_modules=Extension(  # test for not specific name here.
-        sources=[
+        sources=sources,  # test for multi ops
-            'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
-        ],  # test for multi ops
        include_dirs=paddle_includes,
        extra_compile_args=extra_compile_args))
--- a/python/paddle/fluid/tests/custom_op/test_check_abi.py
+++ b/python/paddle/fluid/tests/custom_op/test_check_abi.py
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -52,6 +52,8 @@ class TestCheckCompiler(TestABIBase):
            compiler = 'g++'
        elif utils.IS_WINDOWS:
            compiler = 'cl'
+        else:
+            compiler = 'clang'
        # Linux: all CI gcc version > 5.4.0
        # Windows: all CI MSVC version > 19.00.24215
@@ -71,7 +73,7 @@ class TestCheckCompiler(TestABIBase):
            self.assertTrue(
                "Compiler Compatibility WARNING" in str(error[0].message))
-    def test_exception(self):
+    def test_exception_linux(self):
        # clear environ
        self.del_environ()
        compiler = 'python'  # fake command
@@ -95,6 +97,28 @@ class TestCheckCompiler(TestABIBase):
            # restore
            utils._expected_compiler_current_platform = raw_func
+    def test_exception_mac(self):
+        # clear environ
+        self.del_environ()
+        compiler = 'python'  # fake command
+        if utils.OS_NAME.startswith('darwin'):
+            def fake():
+                return [compiler]
+            # mock a fake function
+            raw_func = utils._expected_compiler_current_platform
+            utils._expected_compiler_current_platform = fake
+            with warnings.catch_warnings(record=True) as error:
+                flag = utils.check_abi_compatibility(compiler, verbose=True)
+                # check return True
+                self.assertTrue(flag)
+                # check ABI Compatibility without WARNING
+                self.assertTrue(len(error) == 0)
+            # restore
+            utils._expected_compiler_current_platform = raw_func
 class TestRunCMDException(unittest.TestCase):
    def test_exception(self):

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_model.py
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -21,9 +21,9 @@ from paddle import nn
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_cc_args, extra_nvcc_args
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_MAC
-# Because Windows don't use docker, the shared lib already exists in the 
+# Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
 file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format(
    get_build_directory())
@@ -35,9 +35,13 @@ if os.name == 'nt' and os.path.isfile(file):
 # custom_relu_op_dup.cc is only used for multi ops test,
 # not a new op, if you want to test only one op, remove this
 # source file
+source_files = ['custom_relu_op.cc']
+if not IS_MAC:
+    source_files.append('custom_relu_op.cu')
 custom_module = load(
    name='custom_relu_for_model_jit',
-    sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
+    sources=source_files,
    extra_include_paths=paddle_includes,  # add for Coverage CI
    extra_cxx_cflags=extra_cc_args,  # test for cc flags
    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
@@ -84,7 +88,7 @@ class TestDygraphModel(unittest.TestCase):
            for i in range(self.batch_num)
        ]
-        self.devices = ['cpu', 'gpu']
+        self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu']
        # for saving model
        self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams"
@@ -191,7 +195,7 @@ class TestStaticModel(unittest.TestCase):
            for i in range(self.batch_num)
        ]
-        self.devices = ['cpu', 'gpu']
+        self.devices = ['cpu', 'gpu'] if not IS_MAC else ['cpu']
        # for saving model
        self.model_path_template = "infer_model/custom_relu_static_model_{}_{}"

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
@@ -18,10 +18,10 @@ import paddle
 import numpy as np
 from paddle.utils.cpp_extension import load, get_build_directory
 from paddle.utils.cpp_extension.extension_utils import run_cmd
-from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS
+from utils import paddle_includes, extra_cc_args, extra_nvcc_args, IS_WINDOWS, IS_MAC
 from test_custom_relu_op_setup import custom_relu_dynamic, custom_relu_static
-# Because Windows don't use docker, the shared lib already exists in the 
+# Because Windows don't use docker, the shared lib already exists in the
 # cache dir, it will not be compiled again unless the shared lib is removed.
 file = '{}\\custom_relu_module_jit\\custom_relu_module_jit.pyd'.format(
    get_build_directory())
@@ -33,11 +33,13 @@ if os.name == 'nt' and os.path.isfile(file):
 # custom_relu_op_dup.cc is only used for multi ops test,
 # not a new op, if you want to test only one op, remove this
 # source file
+sources = ['custom_relu_op.cc', 'custom_relu_op_dup.cc']
+if not IS_MAC:
+    sources.append('custom_relu_op.cu')
 custom_module = load(
    name='custom_relu_module_jit',
-    sources=[
+    sources=sources,
-        'custom_relu_op.cc', 'custom_relu_op.cu', 'custom_relu_op_dup.cc'
-    ],
    extra_include_paths=paddle_includes,  # add for Coverage CI
    extra_cxx_cflags=extra_cc_args,  # test for cc flags
    extra_cuda_cflags=extra_nvcc_args,  # test for nvcc flags
@@ -112,6 +114,9 @@ class TestJITLoad(unittest.TestCase):
        self.assertTrue(caught_exception)
        caught_exception = False
+        # MAC-CI don't support GPU
+        if IS_MAC:
+            return
        try:
            x = np.random.uniform(-1, 1, [4, 8]).astype('int32')
            custom_relu_dynamic(custom_module.custom_relu, 'gpu', 'int32', x)

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

--- a/python/paddle/fluid/tests/custom_op/utils.py
+++ b/python/paddle/fluid/tests/custom_op/utils.py
@@ -13,10 +13,13 @@
 # limitations under the License.
 import os
+import sys
 import six
 from distutils.sysconfig import get_python_lib
 from paddle.utils.cpp_extension.extension_utils import IS_WINDOWS
+IS_MAC = sys.platform.startswith('darwin')
 site_packages_path = get_python_lib()
 # Note(Aurelius84): We use `add_test` in Cmake to config how to run unittest in CI.
 # `PYTHONPATH` will be set as `build/python/paddle` that will make no way to find

--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -22,14 +22,15 @@ from setuptools.command.easy_install import easy_install
 from setuptools.command.build_ext import build_ext
 from distutils.command.build import build
-from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension_kwargs, add_compile_flag
+from .extension_utils import find_cuda_home, find_rocm_home, normalize_extension_kwargs, add_compile_flag, run_cmd
 from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
 from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
 from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
-from .extension_utils import clean_object_if_change_cflags
+from .extension_utils import clean_object_if_change_cflags, _reset_so_rpath
 from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
 from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
+from .extension_utils import CLANG_COMPILE_FLAGS, CLANG_LINK_FLAGS
 from ...fluid import core
@@ -50,14 +51,14 @@ else:
 def setup(**attr):
    """
    The interface is used to config the process of compiling customized operators,
-    mainly includes how to complile shared library, automatically generate python API 
+    mainly includes how to compile shared library, automatically generate python API 
    and install it into site-package. It supports using customized operators directly with
    ``import`` statement.
    It encapsulates the python built-in ``setuptools.setup`` function and keeps arguments
    and usage same as the native interface. Meanwhile, it hiddens Paddle inner framework
    concepts, such as necessary compiling flags, included paths of head files, and linking
-    flags. It also will automatically search and valid local enviromment and versions of 
+    flags. It also will automatically search and valid local environment and versions of 
    ``cc(Linux)`` , ``cl.exe(Windows)`` and ``nvcc`` , then compiles customized operators 
    supporting CPU or GPU device according to the specified Extension type.
@@ -67,7 +68,7 @@ def setup(**attr):
    For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, 
    then the version of user's local machine should satisfy GCC >= 8.2. 
-    For Windows, Visual Studio version will be checked, and it shoule be greater than or equal to that of 
+    For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of 
    PaddlePaddle (Visual Studio 2015 update3). 
    If the above conditions are not met, the corresponding warning will be printed, and a fatal error may 
    occur because of ABI compatibility.
@@ -130,7 +131,7 @@ def setup(**attr):
        ext_modules(Extension): Specify the Extension instance including customized operator source files, compiling flags et.al. 
                                If only compile operator supporting CPU device, please use ``CppExtension`` ; If compile operator
                                supporting CPU and GPU devices, please use ``CUDAExtension`` .
-        include_dirs(list[str], optional): Specify the extra include directoies to search head files. The interface will automatically add
+        include_dirs(list[str], optional): Specify the extra include directories to search head files. The interface will automatically add
                                 ``site-package/paddle/include`` . Please add the corresponding directory path if including third-party
                                 head files. Default is None.
        extra_compile_args(list[str] | dict, optional): Specify the extra compiling flags such as ``-O3`` . If set ``list[str]`` , all these flags
@@ -158,7 +159,7 @@ def setup(**attr):
        setup(name='custom_module',
              ext_modules=CUDAExtension(
              sources=['relu_op.cc', 'relu_op.cu'])
        # After running `python setup.py install`
        from custom_module import relu
    """
@@ -209,7 +210,7 @@ def CppExtension(sources, *args, **kwargs):
    Op Kernel only supporting CPU device. Please use ``CUDAExtension`` if you want to
    compile Op Kernel that supports both CPU and GPU devices.
-    It furtherly encapsulates python built-in ``setuptools.Extension`` .The arguments and
+    It further encapsulates python built-in ``setuptools.Extension`` .The arguments and
    usage are same as the native interface, except for no need to explicitly specify
    ``name`` .
@@ -259,7 +260,7 @@ def CUDAExtension(sources, *args, **kwargs):
    Op Kernel supporting both CPU and GPU devices. Please use ``CppExtension`` if you want to
    compile Op Kernel that supports only CPU device.
-    It furtherly encapsulates python built-in ``setuptools.Extension`` .The arguments and
+    It further encapsulates python built-in ``setuptools.Extension`` .The arguments and
    usage are same as the native interface, except for no need to explicitly specify
    ``name`` .
@@ -367,11 +368,14 @@ class BuildExtension(build_ext, object):
            self.build_lib = self.output_dir
    def build_extensions(self):
+        if OS_NAME.startswith("darwin"):
+            self._valid_clang_compiler()
        self._check_abi()
        # Note(Aurelius84): If already compiling source before, we should check whether
        # cflags have changed and delete the built shared library to re-compile the source
-        # even though source file content keep unchanaged.
+        # even though source file content keep unchanged.
        so_name = self.get_ext_fullpath(self.extensions[0].name)
        clean_object_if_change_cflags(
            os.path.abspath(so_name), self.extensions[0])
@@ -397,17 +401,21 @@ class BuildExtension(build_ext, object):
            cflags = copy.deepcopy(extra_postargs)
            try:
                original_compiler = self.compiler.compiler_so
-                # ncvv compile CUDA source
+                # nvcc compile CUDA source
                if is_cuda_file(src):
                    if core.is_compiled_with_rocm():
-                        assert ROCM_HOME is not None, "Not found ROCM runtime, please use `export ROCM_PATH= XXX` to specific it."
+                        assert ROCM_HOME is not None, "Not found ROCM runtime, \
+                            please use `export ROCM_PATH= XXX` to specify it."
                        hipcc_cmd = os.path.join(ROCM_HOME, 'bin', 'hipcc')
                        self.compiler.set_executable('compiler_so', hipcc_cmd)
                        # {'nvcc': {}, 'cxx: {}}
                        if isinstance(cflags, dict):
                            cflags = cflags['hipcc']
                    else:
-                        assert CUDA_HOME is not None, "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
+                        assert CUDA_HOME is not None, "Not found CUDA runtime, \
+                            please use `export CUDA_HOME= XXX` to specify it."
                        nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
                        self.compiler.set_executable('compiler_so', nvcc_cmd)
                        # {'nvcc': {}, 'cxx: {}}
@@ -424,7 +432,7 @@ class BuildExtension(build_ext, object):
                original_compile(obj, src, ext, cc_args, cflags, pp_opts)
            finally:
                # restore original_compiler
-                self.compiler.compiler_so = original_compiler
+                self.compiler.set_executable('compiler_so', original_compiler)
        def win_custom_single_compiler(sources,
                                       output_dir=None,
@@ -470,7 +478,9 @@ class BuildExtension(build_ext, object):
                src = src_list[0]
                obj = obj_list[0]
                if is_cuda_file(src):
-                    assert CUDA_HOME is not None, "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
+                    assert CUDA_HOME is not None, "Not found CUDA runtime, \
+                        please use `export CUDA_HOME= XXX` to specify it."
                    nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
                    if isinstance(self.cflags, dict):
                        cflags = self.cflags['nvcc']
@@ -548,22 +558,42 @@ class BuildExtension(build_ext, object):
        print("Compiling user custom op, it will cost a few seconds.....")
        build_ext.build_extensions(self)
+        # Reset runtime library path on MacOS platform
+        so_path = self.get_ext_fullpath(self.extensions[0]._full_name)
+        _reset_so_rpath(so_path)
    def get_ext_filename(self, fullname):
        # for example: custommed_extension.cpython-37m-x86_64-linux-gnu.so
        ext_name = super(BuildExtension, self).get_ext_filename(fullname)
+        split_str = '.'
+        name_items = ext_name.split(split_str)
        if self.no_python_abi_suffix and six.PY3:
-            split_str = '.'
-            name_items = ext_name.split(split_str)
            assert len(
                name_items
            ) > 2, "Expected len(name_items) > 2, but received {}".format(
                len(name_items))
            name_items.pop(-2)
-            # custommed_extension.so
            ext_name = split_str.join(name_items)
+        # custommed_extension.dylib
+        if OS_NAME.startswith('darwin'):
+            name_items[-1] = 'dylib'
+            ext_name = split_str.join(name_items)
        return ext_name
+    def _valid_clang_compiler(self):
+        """
+        Make sure to use Clang as compiler on Mac platform
+        """
+        compiler_infos = ['clang'] + CLANG_COMPILE_FLAGS
+        linker_infos = ['clang'] + CLANG_LINK_FLAGS
+        self.compiler.set_executables(
+            compiler=compiler_infos,
+            compiler_so=compiler_infos,
+            compiler_cxx=['clang'],
+            linker_exe=['clang'],
+            linker_so=linker_infos)
    def _check_abi(self):
        """
        Check ABI Compatibility.
@@ -628,6 +658,8 @@ class EasyInstallCommand(easy_install, object):
            will_rename = False
            if OS_NAME.startswith('linux') and ext == '.so':
                will_rename = True
+            elif OS_NAME.startswith('darwin') and ext == '.dylib':
+                will_rename = True
            elif IS_WINDOWS and ext == '.pyd':
                will_rename = True
@@ -702,7 +734,7 @@ def load(name,
    For Linux, GCC version will be checked . For example if Paddle with CUDA 10.1 is built with GCC 8.2, 
    then the version of user's local machine should satisfy GCC >= 8.2. 
-    For Windows, Visual Studio version will be checked, and it shoule be greater than or equal to that of 
+    For Windows, Visual Studio version will be checked, and it should be greater than or equal to that of 
    PaddlePaddle (Visual Studio 2015 update3). 
    If the above conditions are not met, the corresponding warning will be printed, and a fatal error may 
    occur because of ABI compatibility.
@@ -729,7 +761,7 @@ def load(name,
        custom_op_module = load(
            name="op_shared_libary_name",                # name of shared library
-            sources=['relu_op.cc', 'relu_op.cu'],        # source files of cusomized op
+            sources=['relu_op.cc', 'relu_op.cu'],        # source files of customized op
            extra_cxx_cflags=['-g', '-w'],               # optional, specify extra flags to compile .cc/.cpp file
            extra_cuda_cflags=['-O2'],                   # optional, specify extra flags to compile .cu file
            verbose=True                                 # optional, specify to output log information
@@ -761,7 +793,7 @@ def load(name,
        verbose(bool, optional): whether to verbose compiled log information. Default is False
    Returns:
-        Moudle: A callable python module contains all CustomOp Layer APIs.
+        Module: A callable python module contains all CustomOp Layer APIs.
    """

--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -44,6 +44,13 @@ MSVC_COMPILE_FLAGS = [
    '/wd4190', '/EHsc', '/w', '/DGOOGLE_GLOG_DLL_DECL',
    '/DBOOST_HAS_STATIC_ASSERT', '/DNDEBUG', '/DPADDLE_USE_DSO'
 ]
+CLANG_COMPILE_FLAGS = [
+    '-fno-common', '-dynamic', '-DNDEBUG', '-g', '-fwrapv', '-O3', '-arch',
+    'x86_64'
+]
+CLANG_LINK_FLAGS = [
+    '-dynamiclib', '-undefined', 'dynamic_lookup', '-arch', 'x86_64'
+]
 MSVC_LINK_FLAGS = ['/MACHINE:X64', 'paddle_custom_op.lib']
@@ -247,7 +254,7 @@ class VersionManager:
 def combine_hash(md5, value):
    """
    Return new hash value.
-    DO NOT use `hash()` beacuse it doesn't generate stable value between different process.
+    DO NOT use `hash()` because it doesn't generate stable value between different process.
    See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions
    """
    md5.update(repr(value).encode())
@@ -286,13 +293,13 @@ def clean_object_if_change_cflags(so_path, extension):
    if os.path.exists(so_path) and os.path.exists(version_file):
        old_version_info = deserialize(version_file)
        so_version = old_version_info.get(so_name, None)
-        # delete shared library file if versison is changed to re-compile it.
+        # delete shared library file if version is changed to re-compile it.
        if so_version is not None and so_version != versioner.version:
            log_v(
                "Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.".
                format(so_name, versioner.version, version_file))
            os.remove(so_path)
-            # upate new version information
+            # update new version information
            new_version_info = versioner.details
            new_version_info[so_name] = versioner.version
            serialize(version_file, new_version_info)
@@ -348,6 +355,54 @@ def get_cuda_arch_flags(cflags):
    return []
+def _get_fluid_path():
+    """
+    Return installed fluid dir path.
+    """
+    import paddle
+    return os.path.join(os.path.dirname(paddle.__file__), 'fluid')
+def _get_core_name():
+    """
+    Return pybind DSO module name.
+    """
+    import paddle
+    if paddle.fluid.core.load_noavx:
+        return 'core_noavx.so'
+    else:
+        return 'core_avx.so'
+def _get_lib_core_path():
+    """
+    Return real path of libcore_(no)avx.dylib on MacOS.
+    """
+    raw_core_name = _get_core_name()
+    lib_core_name = "lib{}.dylib".format(raw_core_name[:-3])
+    return os.path.join(_get_fluid_path(), lib_core_name)
+def _reset_so_rpath(so_path):
+    """
+    NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
+    in setup.py.in. While loading custom op, `@loader_path` is the dirname of custom op
+    instead of `paddle/fluid`. So we modify `@loader_path` from custom dylib into `@rpath`
+    to ensure dynamic loader find it correctly.
+    Moreover, we will add `-rpath site-packages/paddle/fluid` while linking the dylib so
+    that we don't need to set `LD_LIBRARY_PATH` any more.
+    """
+    assert os.path.exists(so_path)
+    if OS_NAME.startswith("darwin"):
+        origin_runtime_path = "@loader_path/../libs/"
+        rpath = "@rpath/{}".format(_get_core_name())
+        cmd = 'install_name_tool -change {} {} {}'.format(origin_runtime_path,
+                                                          rpath, so_path)
+        run_cmd(cmd)
 def normalize_extension_kwargs(kwargs, use_cuda=False):
    """
    Normalize include_dirs, library_dir and other attributes in kwargs.
@@ -381,15 +436,28 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
            extra_link_args.extend(['cudadevrt.lib', 'cudart_static.lib'])
        kwargs['extra_link_args'] = extra_link_args
    else:
+        ########################### Linux Platform ###########################
+        extra_link_args = kwargs.get('extra_link_args', [])
+        # On Linux, GCC support '-l:xxx.so' to specify the library name
+        # without `lib` prefix.
+        if OS_NAME.startswith('linux'):
+            extra_link_args.append('-l:{}'.format(_get_core_name()))
+        ########################### MacOS Platform ###########################
+        else:
+            # See _reset_so_rpath for details.
+            extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
+            # On MacOS, ld don't support `-l:xx`, so we create a
+            # libcore_avx.dylib symbol link.
+            lib_core_name = create_sym_link_if_not_exist()
+            extra_link_args.append('-l{}'.format(lib_core_name))
+        ###########################   -- END --    ###########################
        add_compile_flag(extra_compile_args, ['-w'])  # disable warning
        # Note(Aurelius84): This marco will impact memory layout of `Tensor`.
-        # We align it automatially with pre-installed Paddle.
+        # We align it automatically with pre-installed Paddle.
        if core.is_compiled_with_mkldnn():
            add_compile_flag(extra_compile_args, ['-DPADDLE_WITH_MKLDNN'])
-        # append link flags
-        extra_link_args = kwargs.get('extra_link_args', [])
-        extra_link_args.append('-lpaddle_custom_op')
        if use_cuda:
            extra_link_args.append('-lcudart')
@@ -406,6 +474,30 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
    return kwargs
+def create_sym_link_if_not_exist():
+    """
+    Create soft symbol link of `core_avx.so` or `core_noavx.so`
+    """
+    assert OS_NAME.startswith('darwin')
+    raw_core_name = _get_core_name()
+    core_path = os.path.join(_get_fluid_path(), raw_core_name)
+    new_lib_core_path = _get_lib_core_path()
+    # create symbol link
+    if not os.path.exists(new_lib_core_path):
+        try:
+            os.symlink(core_path, new_lib_core_path)
+            assert os.path.exists(new_lib_core_path)
+        except Exception:
+            raise RuntimeError(
+                "Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`".
+                format(raw_core_name, core_path, new_lib_core_path))
+    # core_avx or core_noavx without suffix
+    return raw_core_name[:-3]
 def find_cuda_home():
    """
    Use heuristic method to find cuda path
@@ -518,6 +610,11 @@ def find_paddle_includes(use_cuda=False):
            cuda_include_dir = find_cuda_includes()
            include_dirs.extend(cuda_include_dir)
+    if OS_NAME.startswith('darwin'):
+        # NOTE(Aurelius84): Ensure to find std v1 headers correctly.
+        std_v1_includes = '/Library/Developer/CommandLineTools/usr/include/c++/v1/'
+        include_dirs.append(std_v1_includes)
    return include_dirs
@@ -567,6 +664,9 @@ def find_paddle_libraries(use_cuda=False):
            cuda_lib_dir = find_cuda_libraries()
            paddle_lib_dirs.extend(cuda_lib_dir)
+    # add `paddle/fluid` to search `core_avx.so` or `core_noavx.so`
+    paddle_lib_dirs.append(_get_fluid_path())
    return paddle_lib_dirs
@@ -614,9 +714,6 @@ def get_build_directory(verbose=False):
        if IS_WINDOWS:
            root_extensions_directory = os.path.normpath(
                root_extensions_directory)
-        elif OS_NAME.startswith('darwin'):
-            # TODO(Aurelius84): consider macOs
-            raise NotImplementedError("Not support Mac now.")
        log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.".
              format(root_extensions_directory), verbose)
@@ -654,6 +751,8 @@ def _import_module_from_library(module_name, build_directory, verbose=False):
    """
    if IS_WINDOWS:
        dynamic_suffix = '.pyd'
+    elif OS_NAME.startswith('darwin'):
+        dynamic_suffix = '.dylib'
    else:
        dynamic_suffix = '.so'
    ext_path = os.path.join(build_directory, module_name + dynamic_suffix)
@@ -708,7 +807,7 @@ def _custom_api_content(op_name):
                # Set 'float32' temporarily, and the actual dtype of output variable will be inferred
                # in runtime.
                outs[out_name] = helper.create_variable(dtype='float32')
            helper.append_op(type="{op_name}", inputs=ins, outputs=outs, attrs=attrs)
            res = [outs[out_name] for out_name in out_names]
@@ -757,7 +856,7 @@ def _get_api_inputs_str(op_name):
    # e.g: x, y, z
    param_names = in_names + attr_names
    # NOTE(chenweihang): we add suffix `@VECTOR` for std::vector<Tensor> input,
-    # but the string contains `@` cannot used as argument name, so we split 
+    # but the string contains `@` cannot used as argument name, so we split
    # input name by `@`, and only use first substr as argument
    params_str = ','.join([p.split("@")[0].lower() for p in param_names])
    # e.g: {'X': x, 'Y': y, 'Z': z}

--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -351,10 +351,6 @@ if '${WITH_XPU}' == 'OFF' and '${XPU_SDK_ROOT}' != '':
 ### New custom op extension mechanism related ###
-# copy libpaddle_custom_op.so to libs on linux
-if sys.platform.startswith('linux'):
-    shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
-    package_data['paddle.libs'] += ['libpaddle_custom_op.so']
 # copy paddle_custom_op.lib/paddle_custom_op.dll to libs on Windows
 if os.name == 'nt':