From 4261ae34291ff80bc8f9579948d59cc8df55b16e Mon Sep 17 00:00:00 2001
From: Chen Weihang <chenweihang@baidu.com>
Date: Mon, 19 Sep 2022 10:16:13 +0800
Subject: [PATCH] Unify core avx and core_noavx to libpaddle (#46095) (#46113)

* unify  core_avx and core_noavx

* fix except error

* revert mac compile logic

* revert dylib to so

* add core_noavx branch

* remove core_noavx

* replace paddle_core by lib paddle

* polish var name

* replace paddle_core by libpaddle

* update custom device commit

* polish code by comments
---
 paddle/fluid/pybind/CMakeLists.txt            |  14 +-
 paddle/fluid/pybind/pybind.cc                 |  16 +-
 paddle/scripts/paddle_build.sh                |  41 ++--
 paddle/scripts/windows_build/build.bat        |   6 +-
 python/CMakeLists.txt                         |  71 +------
 python/paddle/fluid/core.py                   | 191 ++++++------------
 .../custom_kernel_dot_c_setup.py              |   4 +-
 .../custom_kernel/custom_kernel_dot_setup.py  |   4 +-
 .../fluid/tests/custom_runtime/CMakeLists.txt |   2 +-
 .../utils/cpp_extension/cpp_extension.py      |   2 +-
 .../utils/cpp_extension/extension_utils.py    |  19 +-
 python/setup.py.in                            |   7 +-
 tools/print_signatures.py                     |   2 +-
 13 files changed, 135 insertions(+), 244 deletions(-)

diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index 72885c0bbe5..354f48c7bb9 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -573,24 +573,24 @@ if(WITH_PYTHON)
   endif()
 
   cc_library(
-    paddle_pybind SHARED
+    libpaddle SHARED
     SRCS ${PYBIND_SRCS}
     DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
 
   if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
-    add_dependencies(paddle_pybind legacy_eager_codegen)
-    add_dependencies(paddle_pybind eager_legacy_op_function_generator_cmd)
+    add_dependencies(libpaddle legacy_eager_codegen)
+    add_dependencies(libpaddle eager_legacy_op_function_generator_cmd)
   endif()
 
   if(NOT APPLE AND NOT WIN32)
-    target_link_libraries(paddle_pybind rt)
+    target_link_libraries(libpaddle rt)
   endif()
 
   if(WITH_ROCM)
-    target_link_libraries(paddle_pybind ${ROCM_HIPRTC_LIB})
+    target_link_libraries(libpaddle ${ROCM_HIPRTC_LIB})
   endif()
 
   get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
-  target_link_libraries(paddle_pybind ${os_dependency_modules})
-  add_dependencies(paddle_pybind op_function_generator_cmd)
+  target_link_libraries(libpaddle ${os_dependency_modules})
+  add_dependencies(libpaddle op_function_generator_cmd)
 endif()
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index 2a8e7e059c9..1421b823cda 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -205,6 +205,14 @@ PyTypeObject *g_framework_scope_pytype = nullptr;
 PyTypeObject *g_framework_lodtensorarray_pytype = nullptr;
 PyTypeObject *g_custom_op_kernel_ctx_pytype = nullptr;
 
+bool IsCompiledWithAVX() {
+#ifndef PADDLE_WITH_AVX
+  return false;
+#else
+  return true;
+#endif
+}
+
 bool IsCompiledWithCUDA() {
 #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
   return false;
@@ -576,12 +584,7 @@ static int GetNCCLVersion() {
 }
 #endif
 
-#ifdef PADDLE_WITH_AVX
-PYBIND11_MODULE(core_avx, m) {
-#else
-PYBIND11_MODULE(core_noavx, m) {
-#endif
-
+PYBIND11_MODULE(libpaddle, m) {
   BindImperative(&m);
   BindEager(&m);
   BindEagerStringTensor(&m);
@@ -1705,6 +1708,7 @@ All parameter, weight, gradient are variables in Paddle.
   });
   m.def("init_default_kernel_signatures",
         []() { framework::InitDefaultKernelSignatureMap(); });
+  m.def("is_compiled_with_avx", IsCompiledWithAVX);
   m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
   m.def("is_compiled_with_ascend", IsCompiledWithAscend);
   m.def("is_compiled_with_rocm", IsCompiledWithROCM);
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index ddb888dfaa0..6fd5537ab0d 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -272,7 +272,6 @@ EOF
         -DWITH_DISTRIBUTE=${distibuted_flag} \
         -DWITH_MKL=${WITH_MKL:-ON} \
         -DWITH_AVX=${WITH_AVX:-OFF} \
-        -DNOAVX_CORE_FILE=${NOAVX_CORE_FILE:-""} \
         -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
         -DNEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} \
         -DNEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} \
@@ -546,23 +545,26 @@ EOF
 }
 
 
-function combine_avx_noavx_build() {
-    mkdir -p ${PADDLE_ROOT}/build.noavx
-    cd ${PADDLE_ROOT}/build.noavx
-    WITH_AVX=OFF
+function avx_build() {
+    mkdir -p ${PADDLE_ROOT}/build
+    cd ${PADDLE_ROOT}/build
+    WITH_AVX=ON
+
     cmake_base ${PYTHON_ABI:-""}
     build_base
+}
 
-    # build combined one
+
+function noavx_build() {
     mkdir -p ${PADDLE_ROOT}/build
     cd ${PADDLE_ROOT}/build
-    NOAVX_CORE_FILE=`find ${PADDLE_ROOT}/build.noavx/python/paddle/fluid/ -name "core_noavx.*"`
-    WITH_AVX=ON
+    WITH_AVX=OFF
 
     cmake_base ${PYTHON_ABI:-""}
     build_base
 }
 
+
 function mac_m1_arm_build() {
     mkdir -p ${PADDLE_ROOT}/build
     cd ${PADDLE_ROOT}/build
@@ -2887,12 +2889,12 @@ EOF
     local LIB_TYPE=$1
     case $LIB_TYPE in
       full)
-        # Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
-        make -j `nproc` framework_py_proto copy_paddle_pybind paddle_python
+        # Build full Paddle Python module. Will timeout without caching 'copy_libpaddle' first
+        make -j `nproc` framework_py_proto copy_libpaddle paddle_python
         ;;
       pybind)
         # Build paddle pybind library. Takes 49 minutes to build. Might timeout
-        make -j `nproc` copy_paddle_pybind
+        make -j `nproc` copy_libpaddle
         ;;
       proto)
         # Even smaller library.
@@ -3485,16 +3487,25 @@ function main() {
         gen_dockerfile ${PYTHON_ABI:-""}
         assert_api_spec_approvals
         ;;
-      combine_avx_noavx)
-        combine_avx_noavx_build
+      avx_build)
+        avx_build
+        gen_dockerfile ${PYTHON_ABI:-""}
+        ;;
+      noavx_build)
+        noavx_build
         gen_dockerfile ${PYTHON_ABI:-""}
         ;;
       mac_m1_arm)
         mac_m1_arm_build
         gen_dockerfile ${PYTHON_ABI:-""}
         ;;
-      combine_avx_noavx_build_and_test)
-        combine_avx_noavx_build
+      avx_build_and_test)
+        avx_build
+        gen_dockerfile ${PYTHON_ABI:-""}
+        parallel_test_base
+        ;;
+      noavx_build_and_test)
+        noavx_build
         gen_dockerfile ${PYTHON_ABI:-""}
         parallel_test_base
         ;;
diff --git a/paddle/scripts/windows_build/build.bat b/paddle/scripts/windows_build/build.bat
index 9a2ed349e5b..0aeacfef7f9 100644
--- a/paddle/scripts/windows_build/build.bat
+++ b/paddle/scripts/windows_build/build.bat
@@ -82,8 +82,8 @@ echo Current directory : %cd%
 
 call:rest_env
 
-echo cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All -DNOAVX_CORE_FILE=%dst_path%_noavx\python\paddle\fluid\core_noavx.pyd
-cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All -DNOAVX_CORE_FILE=%dst_path%_noavx\python\paddle\fluid\core_noavx.pyd
+echo cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All
+cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All
 
 set  MSBUILDDISABLENODEREUSE=1
 
@@ -184,4 +184,4 @@ exit /b 1
 :END
 echo BUILD SUCCESSFULLY
 
-ENDLOCAL
\ No newline at end of file
+ENDLOCAL
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index e33af8b1bd5..b935fb78f4e 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -20,28 +20,7 @@ endif()
 
 set(SETUP_LOG_FILE "setup.py.log")
 
-set(FLUID_CORE_NAME "core")
-if(WITH_AVX AND AVX_FOUND)
-  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx")
-  if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "")
-    message(
-      STATUS
-        "MESSAGE: This is just a message for publishing release.
-      You are building AVX version without NOAVX core.
-      So the wheel package may fail on NOAVX machine.
-      You can add -DNOAVX_CORE_FILE=/path/to/your/core_noavx.* in cmake command
-      to get a full wheel package to resolve this warning.
-      While, this version will still work on local machine.")
-  endif()
-
-  if(NOAVX_CORE_FILE AND NOT EXISTS "${NOAVX_CORE_FILE}")
-    message(FATAL_ERROR "The file ${NOAVX_CORE_FILE} does not exist!")
-  endif()
-
-  set(HAS_NOAVX_CORE ON)
-else()
-  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_noavx")
-endif()
+set(FLUID_CORE_NAME "libpaddle")
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
                ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
@@ -55,48 +34,20 @@ if(WIN32)
 
   add_custom_command(
     OUTPUT ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_LINKER_FILE:paddle_pybind> ${FLUID_CORE_LIB}
-    DEPENDS paddle_pybind)
-
-  set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
+    COMMAND cmake -E copy $<TARGET_FILE:libpaddle> ${FLUID_CORE}
+    COMMAND cmake -E copy $<TARGET_LINKER_FILE:libpaddle> ${FLUID_CORE_LIB}
+    DEPENDS libpaddle)
 else()
   set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so)
   add_custom_command(
     OUTPUT ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
-    DEPENDS paddle_pybind)
-
-  set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so)
+    COMMAND cmake -E copy $<TARGET_FILE:libpaddle> ${FLUID_CORE}
+    DEPENDS libpaddle)
 endif()
 
 set(FLUID_CORE_DEPS ${FLUID_CORE})
 
-if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}")
-  get_filename_component(NOAVX_CORE_NAME ${NOAVX_CORE_FILE} NAME)
-  get_filename_component(NOAVX_CORE_EXT ${NOAVX_CORE_FILE} EXT)
-  if(WIN32)
-    if(NOT NOAVX_CORE_EXT STREQUAL ".pyd")
-      message(
-        FATAL_ERROR
-          "Wrong file ${NOAVX_CORE_NAME}, the ext does not match windows *.pyd!"
-      )
-    endif()
-  else()
-    if(NOT NOAVX_CORE_EXT STREQUAL ".so")
-      message(
-        FATAL_ERROR
-          "Wrong file ${NOAVX_CORE_NAME}, the ext does not match *.so!")
-    endif()
-  endif()
-  add_custom_command(
-    OUTPUT ${FLUID_NOAVX_CORE}
-    COMMAND cmake -E copy ${NOAVX_CORE_FILE} ${FLUID_NOAVX_CORE}
-    DEPENDS paddle_pybind)
-  list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE})
-endif()
-
-add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS})
+add_custom_target(copy_libpaddle ALL DEPENDS ${FLUID_CORE_DEPS})
 
 if(WIN32)
   add_custom_command(
@@ -107,8 +58,8 @@ if(WIN32)
     COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py
             bdist_wheel
     COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto
-            profiler_py_proto pass_desc_py_proto ${PY_FILES})
+    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+            pass_desc_py_proto ${PY_FILES})
 else()
   add_custom_command(
     OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
@@ -116,8 +67,8 @@ else()
     COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
     COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
     COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto
-            profiler_py_proto pass_desc_py_proto ${PY_FILES})
+    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+            pass_desc_py_proto ${PY_FILES})
 endif()
 
 add_custom_target(paddle_python ALL
diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py
index 1fa3c769d77..09a659ff573 100644
--- a/python/paddle/fluid/core.py
+++ b/python/paddle/fluid/core.py
@@ -24,15 +24,10 @@ core_suffix = 'so'
 if os.name == 'nt':
     core_suffix = 'pyd'
 
-has_avx_core = False
-has_noavx_core = False
-
+has_libpaddle_so = False
 current_path = os.path.abspath(os.path.dirname(__file__))
-if os.path.exists(current_path + os.sep + 'core_avx.' + core_suffix):
-    has_avx_core = True
-
-if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix):
-    has_noavx_core = True
+if os.path.exists(current_path + os.sep + 'libpaddle.' + core_suffix):
+    has_libpaddle_so = True
 
 try:
     if os.name == 'nt':
@@ -198,10 +193,8 @@ def load_dso(dso_absolute_path):
 
 
 def pre_load(dso_name):
-    if has_avx_core:
-        core_so = current_path + os.sep + 'core_avx.' + core_suffix
-    elif has_noavx_core:
-        core_so = current_path + os.sep + 'core_noavx.' + core_suffix
+    if has_libpaddle_so:
+        core_so = current_path + os.sep + 'libpaddle.' + core_suffix
     else:
         core_so = None
     dso_path = get_dso_path(core_so, dso_name)
@@ -239,7 +232,7 @@ def less_than_ver(a, b):
 # (1) the number of dynamic shared librarys (DSO) loaded > 14,
 # (2) after that, load a dynamic shared library (DSO) with static TLS.
 # For paddle, the problem is that 'libgomp' is a DSO with static TLS, and it is loaded after 14 DSOs.
-# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'core_avx.so'.
+# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'libpaddle.so'.
 # The final solution is to upgrade glibc to > 2.22 on the target system.
 if platform.system().lower() == 'linux':
     libc_type, libc_ver = get_libc_ver()
@@ -247,123 +240,65 @@ if platform.system().lower() == 'linux':
         try:
             pre_load('libgomp')
         except Exception as e:
-            # NOTE(zhiqiu): do not abort if failed, since it may success when import core_avx.so
+            # NOTE(zhiqiu): do not abort if failed, since it may success when import libpaddle.so
             sys.stderr.write('Error: Can not preload libgomp.so')
 
-load_noavx = False
-
-if avx_supported():
-    try:
-        from . import core_avx
-        core_avx.LoDTensor = core_avx.Tensor
-
-        from .core_avx import *
-        from .core_avx import __doc__, __file__, __name__, __package__
-        from .core_avx import __unittest_throw_exception__
-        from .core_avx import _append_python_callable_object_and_return_id
-        from .core_avx import _cleanup, _Scope
-        from .core_avx import _get_use_default_grad_op_desc_maker_ops
-        from .core_avx import _get_all_register_op_kernels
-        from .core_avx import _is_program_version_supported
-        from .core_avx import _set_eager_deletion_mode
-        from .core_avx import _get_eager_deletion_vars
-        from .core_avx import _set_fuse_parameter_group_size
-        from .core_avx import _set_fuse_parameter_memory_size
-        from .core_avx import _is_dygraph_debug_enabled
-        from .core_avx import _dygraph_debug_level
-        from .core_avx import _switch_tracer
-        from .core_avx import _set_paddle_lib_path
-        from .core_avx import _create_loaded_parameter
-        from .core_avx import _cuda_synchronize
-        from .core_avx import _is_compiled_with_heterps
-        from .core_avx import _promote_types_if_complex_exists
-        from .core_avx import _set_cached_executor_build_strategy
-        from .core_avx import _device_synchronize
-        from .core_avx import _get_current_stream
-        from .core_avx import _Profiler, _ProfilerResult, _RecordEvent
-        from .core_avx import _set_current_stream
-        if sys.platform != 'win32':
-            from .core_avx import _set_process_pids
-            from .core_avx import _erase_process_pids
-            from .core_avx import _set_process_signal_handler
-            from .core_avx import _throw_error_if_process_failed
-            from .core_avx import _convert_to_tensor_list
-            from .core_avx import _array_to_share_memory_tensor
-            from .core_avx import _cleanup_mmap_fds
-            from .core_avx import _remove_tensor_list_mmap_fds
-    except Exception as e:
-        if has_avx_core:
-            sys.stderr.write(
-                'Error: Can not import avx core while this file exists: ' +
-                current_path + os.sep + 'core_avx.' + core_suffix + '\n')
-            raise e
-        else:
-            from .. import compat as cpt
-            sys.stderr.write(
-                "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
-                "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
-                "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
-                "to get better performance.\nThe original error is: %s\n" %
-                cpt.get_exception_message(e))
-            load_noavx = True
-else:
-    load_noavx = True
-
-if load_noavx:
-    try:
-        from . import core_noavx
-        core_noavx.LoDTensor = core_noavx.Tensor
-
-        from .core_noavx import *
-        from .core_noavx import __doc__, __file__, __name__, __package__
-        from .core_noavx import __unittest_throw_exception__
-        from .core_noavx import _append_python_callable_object_and_return_id
-        from .core_noavx import _cleanup, _Scope
-        from .core_noavx import _get_use_default_grad_op_desc_maker_ops
-        from .core_noavx import _get_all_register_op_kernels
-        from .core_noavx import _is_program_version_supported
-        from .core_noavx import _set_eager_deletion_mode
-        from .core_noavx import _get_eager_deletion_vars
-        from .core_noavx import _set_fuse_parameter_group_size
-        from .core_noavx import _set_fuse_parameter_memory_size
-        from .core_noavx import _is_dygraph_debug_enabled
-        from .core_noavx import _dygraph_debug_level
-        from .core_noavx import _switch_tracer
-        from .core_noavx import _set_paddle_lib_path
-        from .core_noavx import _create_loaded_parameter
-        from .core_noavx import _cuda_synchronize
-        from .core_noavx import _is_compiled_with_heterps
-        from .core_noavx import _promote_types_if_complex_exists
-        from .core_noavx import _set_cached_executor_build_strategy
-        from .core_noavx import _device_synchronize
-        from .core_noavx import _get_current_stream
-        from .core_noavx import _set_current_stream
-        from .core_noavx import _Profiler, _ProfilerResult, _RecordEvent
-        if sys.platform != 'win32':
-            from .core_noavx import _set_process_pids
-            from .core_noavx import _erase_process_pids
-            from .core_noavx import _set_process_signal_handler
-            from .core_noavx import _throw_error_if_process_failed
-            from .core_noavx import _convert_to_tensor_list
-            from .core_noavx import _array_to_share_memory_tensor
-            from .core_noavx import _cleanup_mmap_fds
-            from .core_noavx import _remove_tensor_list_mmap_fds
-    except Exception as e:
-        if has_noavx_core:
-            sys.stderr.write(
-                'Error: Can not import noavx core while this file exists: ' +
-                current_path + os.sep + 'core_noavx.' + core_suffix + '\n')
-        elif avx_supported():
-            sys.stderr.write(
-                "Error: The installed PaddlePaddle is incorrect. You should reinstall it by "
-                "'python -m pip install --force-reinstall paddlepaddle-gpu[==version]'\n"
-            )
-        else:
-            sys.stderr.write(
-                "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
-                "you should reinstall paddlepaddle with no-avx core.\n")
-
-        raise e
+try:
+    from . import libpaddle
+    if avx_supported() and not libpaddle.is_compiled_with_avx():
+        sys.stderr.write(
+            "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
+            "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
+            "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
+            "to get better performance.\n")
+
+    # assign tensor alias
+    libpaddle.LoDTensor = libpaddle.Tensor
+
+    from .libpaddle import *
+    from .libpaddle import __doc__, __file__, __name__, __package__
+    from .libpaddle import __unittest_throw_exception__
+    from .libpaddle import _append_python_callable_object_and_return_id
+    from .libpaddle import _cleanup, _Scope
+    from .libpaddle import _get_use_default_grad_op_desc_maker_ops
+    from .libpaddle import _get_all_register_op_kernels
+    from .libpaddle import _is_program_version_supported
+    from .libpaddle import _set_eager_deletion_mode
+    from .libpaddle import _get_eager_deletion_vars
+    from .libpaddle import _set_fuse_parameter_group_size
+    from .libpaddle import _set_fuse_parameter_memory_size
+    from .libpaddle import _is_dygraph_debug_enabled
+    from .libpaddle import _dygraph_debug_level
+    from .libpaddle import _switch_tracer
+    from .libpaddle import _set_paddle_lib_path
+    from .libpaddle import _create_loaded_parameter
+    from .libpaddle import _cuda_synchronize
+    from .libpaddle import _is_compiled_with_heterps
+    from .libpaddle import _promote_types_if_complex_exists
+    from .libpaddle import _set_cached_executor_build_strategy
+    from .libpaddle import _device_synchronize
+    from .libpaddle import _get_current_stream
+    from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
+    from .libpaddle import _set_current_stream
+    if sys.platform != 'win32':
+        from .libpaddle import _set_process_pids
+        from .libpaddle import _erase_process_pids
+        from .libpaddle import _set_process_signal_handler
+        from .libpaddle import _throw_error_if_process_failed
+        from .libpaddle import _convert_to_tensor_list
+        from .libpaddle import _array_to_share_memory_tensor
+        from .libpaddle import _cleanup_mmap_fds
+        from .libpaddle import _remove_tensor_list_mmap_fds
+except Exception as e:
+    if has_libpaddle_so:
+        sys.stderr.write(
+            'Error: Can not import paddle core while this file exists: ' +
+            current_path + os.sep + 'libpaddle.' + core_suffix + '\n')
+    if not avx_supported() and libpaddle.is_compiled_with_avx():
+        sys.stderr.write(
+            "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
+            "you should reinstall paddlepaddle with no-avx core.\n")
+    raise e
 
 
 def set_paddle_custom_device_lib_path(lib_path):
diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
index e162daf2b87..39d47d6f448 100644
--- a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
+++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
@@ -61,9 +61,7 @@ paddle_custom_kernel_library_dir = [
 ]
 
 # libs
-libs = [':core_avx.so']
-if not core.has_avx_core and core.has_noavx_core:
-    libs = [':core_noavx.so']
+libs = [':libpaddle.so']
 
 custom_kernel_dot_module = Extension(
     'custom_kernel_dot',
diff --git a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
index efe5368cdca..ba116526d3f 100644
--- a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
+++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
@@ -62,9 +62,7 @@ paddle_custom_kernel_library_dir = list(
     map(lambda path: os.path.join(path, 'paddle', 'fluid'), site_packages_path))
 
 # libs
-libs = [':core_avx.so']
-if not core.has_avx_core and core.has_noavx_core:
-    libs = [':core_noavx.so']
+libs = [':libpaddle.so']
 
 custom_kernel_dot_module = Extension(
     'custom_kernel_dot',
diff --git a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
index 099b1ddc1c0..820e2b357aa 100644
--- a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
@@ -1,6 +1,6 @@
 if(WITH_CUSTOM_DEVICE AND NOT WITH_GPU)
   set(PLUGIN_URL https://github.com/PaddlePaddle/PaddleCustomDevice.git)
-  set(PLUGIN_TAG d5e5ac1d8e9f7588d4c2998bb3b5ffc66f65af2e)
+  set(PLUGIN_TAG b9ae8452f31525d0524810461b17856838acd821)
 
   file(
     GLOB TEST_OPS
diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py
index 0367e9ed3e3..7943f5f0e96 100644
--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -440,7 +440,7 @@ class BuildExtension(build_ext, object):
 
                 # NOTE(Aurelius84): Since Paddle 2.0, we require gcc version > 5.x,
                 # so we add this flag to ensure the symbol names from user compiled
-                # shared library have same ABI suffix with core_(no)avx.so.
+                # shared library have same ABI suffix with libpaddle.so.
                 # See https://stackoverflow.com/questions/34571583/understanding-gcc-5s-glibcxx-use-cxx11-abi-or-the-new-abi
                 add_compile_flag(cflags, ['-D_GLIBCXX_USE_CXX11_ABI=1'])
                 # Append this macor only when jointly compiling .cc with .cu
diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py
index 62fce336004..693a47f3f86 100644
--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -399,10 +399,7 @@ def _get_core_name():
     """
     import paddle
     ext_name = '.pyd' if IS_WINDOWS else '.so'
-    if not paddle.fluid.core.load_noavx:
-        return 'core_avx' + ext_name
-    else:
-        return 'core_noavx' + ext_name
+    return 'libpaddle' + ext_name
 
 
 def _get_lib_core_path():
@@ -419,13 +416,13 @@ def _get_dll_core_path():
     Return real path of libcore_(no)avx.dylib on Windows.
     """
     raw_core_name = _get_core_name()
-    dll_core_name = "paddle_pybind.dll"
+    dll_core_name = "libpaddle.dll"
     return os.path.join(_get_fluid_path(), dll_core_name)
 
 
 def _reset_so_rpath(so_path):
     """
-    NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
+    NOTE(Aurelius84): Runtime path of libpaddle.so is modified into `@loader_path/../libs`
     in setup.py.in. While loading custom op, `@loader_path` is the dirname of custom op
     instead of `paddle/fluid`. So we modify `@loader_path` from custom dylib into `@rpath`
     to ensure dynamic loader find it correctly.
@@ -524,7 +521,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
             # See _reset_so_rpath for details.
             extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
             # On MacOS, ld don't support `-l:xx`, so we create a
-            # libcore_avx.dylib symbol link.
+            # liblibpaddle.dylib symbol link.
             lib_core_name = create_sym_link_if_not_exist()
             extra_link_args.append('-l{}'.format(lib_core_name))
         ###########################   -- END --    ###########################
@@ -555,7 +552,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
 
 def create_sym_link_if_not_exist():
     """
-    Create soft symbol link of `core_avx.so` or `core_noavx.so`
+    Create soft symbol link of `libpaddle.so`
     """
     assert OS_NAME.startswith('darwin') or IS_WINDOWS
 
@@ -574,7 +571,7 @@ def create_sym_link_if_not_exist():
                     .format(raw_core_name, new_dll_core_path, core_path,
                             raw_core_name))
                 run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path))
-        # core_avx or core_noavx with lib suffix
+        # libpaddle with lib suffix
         assert os.path.exists(new_dll_core_path)
         return raw_core_name[:-4] + ".lib"
 
@@ -590,7 +587,7 @@ def create_sym_link_if_not_exist():
                     "Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`"
                     .format(raw_core_name, core_path, new_lib_core_path))
 
-        # core_avx or core_noavx without suffix
+        # libpaddle without suffix
         return raw_core_name[:-3]
 
 
@@ -779,7 +776,7 @@ def find_paddle_libraries(use_cuda=False):
             cuda_lib_dir = find_cuda_libraries()
             paddle_lib_dirs.extend(cuda_lib_dir)
 
-    # add `paddle/fluid` to search `core_avx.so` or `core_noavx.so`
+    # add `paddle/fluid` to search `libpaddle.so`
     paddle_lib_dirs.append(_get_fluid_path())
 
     return paddle_lib_dirs
diff --git a/python/setup.py.in b/python/setup.py.in
index 3d400881de3..7fd26082ad6 100755
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -450,8 +450,6 @@ else:
     package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.pyd', '${FLUID_CORE_NAME}' + '.lib']}
 
 package_data['paddle.fluid'] += ['${PADDLE_BINARY_DIR}/python/paddle/cost_model/static_op_benchmark.json']
-if '${HAS_NOAVX_CORE}' == 'ON':
-    package_data['paddle.fluid'] += ['core_noavx' + ('.so' if os.name != 'nt' else '.pyd')]
 
 package_dir={
     '': '${PADDLE_BINARY_DIR}/python',
@@ -493,7 +491,7 @@ else:
         package_data['paddle.libs'] += ['openblas' + ext_name]
     elif os.name == 'posix' and platform.machine() == 'aarch64' and '${OPENBLAS_LIB}'.endswith('so'):
         # copy the libopenblas.so on linux+aarch64
-        # special: core_noavx.so depends on 'libopenblas.so.0', not 'libopenblas.so'
+        # special: libpaddle.so without avx depends on 'libopenblas.so.0', not 'libopenblas.so'
         if os.path.exists('${OPENBLAS_LIB}' + '.0'):
             shutil.copy('${OPENBLAS_LIB}' + '.0', libs_path)
             package_data['paddle.libs'] += ['libopenblas.so.0']
@@ -588,8 +586,7 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
             commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
             commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so')
         else:
-            commands = ["patchelf --set-soname '${FLUID_CORE_NAME}.so' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
-            commands.append("patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so')
+            commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
         # The sw_64 not suppot patchelf, so we just disable that.
         if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
             for command in commands:
diff --git a/tools/print_signatures.py b/tools/print_signatures.py
index f751709a767..5a6ad44e45d 100644
--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -296,7 +296,7 @@ def parse_args():
                         dest='skipped',
                         type=str,
                         help='Skip Checking submodules',
-                        default='paddle.fluid.core_avx.eager.ops')
+                        default='paddle.fluid.libpaddle.eager.ops')
 
     if len(sys.argv) == 1:
         args = parser.parse_args(['paddle'])
-- 
GitLab