Unify core avx and core_noavx to libpaddle (#46095)

* unify core_avx and core_noavx * fix except error * revert mac compile logic * revert dylib to so * add core_noavx branch * remove core_noavx * replace paddle_core by lib paddle * polish var name * replace paddle_core by libpaddle * update custom device commit * polish code by comments

Unify core avx and core_noavx to libpaddle (#46095)
* unify core_avx and core_noavx * fix except error * revert mac compile logic * revert dylib to so * add core_noavx branch * remove core_noavx * replace paddle_core by lib paddle * polish var name * replace paddle_core by libpaddle * update custom device commit * polish code by comments
267d71a4 · Chen Weihang · GitHub · fa97e5ba · 267d71a4 · 267d71a4
13 changed file
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -574,24 +574,24 @@ if(WITH_PYTHON)
  endif()

  cc_library(
-    paddle_pybind SHARED
+    libpaddle SHARED
    SRCS ${PYBIND_SRCS}
    DEPS ${PYBIND_DEPS} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})

  if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
-    add_dependencies(paddle_pybind legacy_eager_codegen)
-    add_dependencies(paddle_pybind eager_legacy_op_function_generator_cmd)
+    add_dependencies(libpaddle legacy_eager_codegen)
+    add_dependencies(libpaddle eager_legacy_op_function_generator_cmd)
  endif()

  if(NOT APPLE AND NOT WIN32)
-    target_link_libraries(paddle_pybind rt)
+    target_link_libraries(libpaddle rt)
  endif()

  if(WITH_ROCM)
-    target_link_libraries(paddle_pybind ${ROCM_HIPRTC_LIB})
+    target_link_libraries(libpaddle ${ROCM_HIPRTC_LIB})
  endif()

  get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
-  target_link_libraries(paddle_pybind ${os_dependency_modules})
-  add_dependencies(paddle_pybind op_function_generator_cmd)
+  target_link_libraries(libpaddle ${os_dependency_modules})
+  add_dependencies(libpaddle op_function_generator_cmd)
 endif()
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -205,6 +205,14 @@ PyTypeObject *g_framework_scope_pytype = nullptr;
 PyTypeObject *g_framework_lodtensorarray_pytype = nullptr;
 PyTypeObject *g_custom_op_kernel_ctx_pytype = nullptr;

+bool IsCompiledWithAVX() {
+#ifndef PADDLE_WITH_AVX
+  return false;
+#else
+  return true;
+#endif
+}
+
 bool IsCompiledWithCUDA() {
 #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP)
  return false;
@@ -576,12 +584,7 @@ static int GetNCCLVersion() {
 }
 #endif

-#ifdef PADDLE_WITH_AVX
-PYBIND11_MODULE(core_avx, m) {
-#else
-PYBIND11_MODULE(core_noavx, m) {
-#endif
-
+PYBIND11_MODULE(libpaddle, m) {
  BindImperative(&m);
  BindEager(&m);
  BindEagerStringTensor(&m);
@@ -1706,6 +1709,7 @@ All parameter, weight, gradient are variables in Paddle.
  });
  m.def("init_default_kernel_signatures",
        []() { framework::InitDefaultKernelSignatureMap(); });
+  m.def("is_compiled_with_avx", IsCompiledWithAVX);
  m.def("is_compiled_with_cuda", IsCompiledWithCUDA);
  m.def("is_compiled_with_ascend", IsCompiledWithAscend);
  m.def("is_compiled_with_rocm", IsCompiledWithROCM);

--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -272,7 +272,6 @@ EOF
        -DWITH_DISTRIBUTE=${distibuted_flag} \
        -DWITH_MKL=${WITH_MKL:-ON} \
        -DWITH_AVX=${WITH_AVX:-OFF} \
-        -DNOAVX_CORE_FILE=${NOAVX_CORE_FILE:-""} \
        -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
        -DNEW_RELEASE_PYPI=${NEW_RELEASE_PYPI:-OFF} \
        -DNEW_RELEASE_ALL=${NEW_RELEASE_ALL:-OFF} \
@@ -546,23 +545,26 @@ EOF
 }


-function combine_avx_noavx_build() {
-    mkdir -p ${PADDLE_ROOT}/build.noavx
-    cd ${PADDLE_ROOT}/build.noavx
-    WITH_AVX=OFF
+function avx_build() {
+    mkdir -p ${PADDLE_ROOT}/build
+    cd ${PADDLE_ROOT}/build
+    WITH_AVX=ON
+
    cmake_base ${PYTHON_ABI:-""}
    build_base
+}

-    # build combined one
+
+function noavx_build() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
-    NOAVX_CORE_FILE=`find ${PADDLE_ROOT}/build.noavx/python/paddle/fluid/ -name "core_noavx.*"`
-    WITH_AVX=ON
+    WITH_AVX=OFF

    cmake_base ${PYTHON_ABI:-""}
    build_base
 }

+
 function mac_m1_arm_build() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
@@ -2887,12 +2889,12 @@ EOF
    local LIB_TYPE=$1
    case $LIB_TYPE in
      full)
-        # Build full Paddle Python module. Will timeout without caching 'copy_paddle_pybind' first
-        make -j `nproc` framework_py_proto copy_paddle_pybind paddle_python
+        # Build full Paddle Python module. Will timeout without caching 'copy_libpaddle' first
+        make -j `nproc` framework_py_proto copy_libpaddle paddle_python
        ;;
      pybind)
        # Build paddle pybind library. Takes 49 minutes to build. Might timeout
-        make -j `nproc` copy_paddle_pybind
+        make -j `nproc` copy_libpaddle
        ;;
      proto)
        # Even smaller library.
@@ -3485,16 +3487,25 @@ function main() {
        gen_dockerfile ${PYTHON_ABI:-""}
        assert_api_spec_approvals
        ;;
-      combine_avx_noavx)
-        combine_avx_noavx_build
+      avx_build)
+        avx_build
+        gen_dockerfile ${PYTHON_ABI:-""}
+        ;;
+      noavx_build)
+        noavx_build
        gen_dockerfile ${PYTHON_ABI:-""}
        ;;
      mac_m1_arm)
        mac_m1_arm_build
        gen_dockerfile ${PYTHON_ABI:-""}
        ;;
-      combine_avx_noavx_build_and_test)
-        combine_avx_noavx_build
+      avx_build_and_test)
+        avx_build
+        gen_dockerfile ${PYTHON_ABI:-""}
+        parallel_test_base
+        ;;
+      noavx_build_and_test)
+        noavx_build
        gen_dockerfile ${PYTHON_ABI:-""}
        parallel_test_base
        ;;

--- a/paddle/scripts/windows_build/build.bat
+++ b/paddle/scripts/windows_build/build.bat
@@ -82,8 +82,8 @@ echo Current directory : %cd%

 call:rest_env

-echo cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All -DNOAVX_CORE_FILE=%dst_path%_noavx\python\paddle\fluid\core_noavx.pyd
-cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All -DNOAVX_CORE_FILE=%dst_path%_noavx\python\paddle\fluid\core_noavx.pyd
+echo cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All
+cmake %dst_path%\..\Paddle -G "Visual Studio 15 2017 Win64" -T host=x64 -DWITH_GPU=%WITH_GPU% -DWITH_MKL=%WITH_MKL% -DWITH_AVX=ON -DPYTHON_INCLUDE_DIR=%PYTHON_DIR%\include\ -DPYTHON_LIBRARY=%PYTHON_DIR%\libs\ -DPYTHON_EXECUTABLE=%PYTHON_DIR%\python.exe -DCMAKE_BUILD_TYPE=Release -DWITH_TESTING=OFF -DWITH_PYTHON=ON -DCUDA_TOOLKIT_ROOT_DIR=%CUDA_DIR% -DCUDA_ARCH_NAME=All

 set  MSBUILDDISABLENODEREUSE=1

@@ -184,4 +184,4 @@ exit /b 1
 :END
 echo BUILD SUCCESSFULLY

-ENDLOCAL
\ No newline at end of file
+ENDLOCAL
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -20,28 +20,7 @@ endif()

 set(SETUP_LOG_FILE "setup.py.log")

-set(FLUID_CORE_NAME "core")
-if(WITH_AVX AND AVX_FOUND)
-  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx")
-  if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "")
-    message(
-      STATUS
-        "MESSAGE: This is just a message for publishing release.
-      You are building AVX version without NOAVX core.
-      So the wheel package may fail on NOAVX machine.
-      You can add -DNOAVX_CORE_FILE=/path/to/your/core_noavx.* in cmake command
-      to get a full wheel package to resolve this warning.
-      While, this version will still work on local machine.")
-  endif()
-
-  if(NOAVX_CORE_FILE AND NOT EXISTS "${NOAVX_CORE_FILE}")
-    message(FATAL_ERROR "The file ${NOAVX_CORE_FILE} does not exist!")
-  endif()
-
-  set(HAS_NOAVX_CORE ON)
-else()
-  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_noavx")
-endif()
+set(FLUID_CORE_NAME "libpaddle")

 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
               ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
@@ -55,48 +34,20 @@ if(WIN32)

  add_custom_command(
    OUTPUT ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_LINKER_FILE:paddle_pybind> ${FLUID_CORE_LIB}
-    DEPENDS paddle_pybind)
-
-  set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
+    COMMAND cmake -E copy $<TARGET_FILE:libpaddle> ${FLUID_CORE}
+    COMMAND cmake -E copy $<TARGET_LINKER_FILE:libpaddle> ${FLUID_CORE_LIB}
+    DEPENDS libpaddle)
 else()
  set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so)
  add_custom_command(
    OUTPUT ${FLUID_CORE}
-    COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
-    DEPENDS paddle_pybind)
-
-  set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so)
+    COMMAND cmake -E copy $<TARGET_FILE:libpaddle> ${FLUID_CORE}
+    DEPENDS libpaddle)
 endif()

 set(FLUID_CORE_DEPS ${FLUID_CORE})

-if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}")
-  get_filename_component(NOAVX_CORE_NAME ${NOAVX_CORE_FILE} NAME)
-  get_filename_component(NOAVX_CORE_EXT ${NOAVX_CORE_FILE} EXT)
-  if(WIN32)
-    if(NOT NOAVX_CORE_EXT STREQUAL ".pyd")
-      message(
-        FATAL_ERROR
-          "Wrong file ${NOAVX_CORE_NAME}, the ext does not match windows *.pyd!"
-      )
-    endif()
-  else()
-    if(NOT NOAVX_CORE_EXT STREQUAL ".so")
-      message(
-        FATAL_ERROR
-          "Wrong file ${NOAVX_CORE_NAME}, the ext does not match *.so!")
-    endif()
-  endif()
-  add_custom_command(
-    OUTPUT ${FLUID_NOAVX_CORE}
-    COMMAND cmake -E copy ${NOAVX_CORE_FILE} ${FLUID_NOAVX_CORE}
-    DEPENDS paddle_pybind)
-  list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE})
-endif()
-
-add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS})
+add_custom_target(copy_libpaddle ALL DEPENDS ${FLUID_CORE_DEPS})

 if(WIN32)
  add_custom_command(
@@ -107,8 +58,8 @@ if(WIN32)
    COMMAND ${CMAKE_COMMAND} -E env ${py_env} ${PYTHON_EXECUTABLE} setup.py
            bdist_wheel
    COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto
-            profiler_py_proto pass_desc_py_proto ${PY_FILES})
+    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+            pass_desc_py_proto ${PY_FILES})
 else()
  add_custom_command(
    OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
@@ -116,8 +67,8 @@ else()
    COMMAND cp -r ${PADDLE_SOURCE_DIR}/python/paddle ${PADDLE_BINARY_DIR}/python
    COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
    COMMENT "Packing whl packages------>>>"
-    DEPENDS copy_paddle_pybind ${FLUID_CORE} framework_py_proto
-            profiler_py_proto pass_desc_py_proto ${PY_FILES})
+    DEPENDS copy_libpaddle ${FLUID_CORE} framework_py_proto profiler_py_proto
+            pass_desc_py_proto ${PY_FILES})
 endif()

 add_custom_target(paddle_python ALL

--- a/python/paddle/fluid/core.py
+++ b/python/paddle/fluid/core.py
@@ -24,15 +24,10 @@ core_suffix = 'so'
 if os.name == 'nt':
    core_suffix = 'pyd'

-has_avx_core = False
-has_noavx_core = False
-
+has_libpaddle_so = False
 current_path = os.path.abspath(os.path.dirname(__file__))
-if os.path.exists(current_path + os.sep + 'core_avx.' + core_suffix):
-    has_avx_core = True
-
-if os.path.exists(current_path + os.sep + 'core_noavx.' + core_suffix):
-    has_noavx_core = True
+if os.path.exists(current_path + os.sep + 'libpaddle.' + core_suffix):
+    has_libpaddle_so = True

 try:
    if os.name == 'nt':
@@ -198,10 +193,8 @@ def load_dso(dso_absolute_path):


 def pre_load(dso_name):
-    if has_avx_core:
-        core_so = current_path + os.sep + 'core_avx.' + core_suffix
-    elif has_noavx_core:
-        core_so = current_path + os.sep + 'core_noavx.' + core_suffix
+    if has_libpaddle_so:
+        core_so = current_path + os.sep + 'libpaddle.' + core_suffix
    else:
        core_so = None
    dso_path = get_dso_path(core_so, dso_name)
@@ -239,7 +232,7 @@ def less_than_ver(a, b):
 # (1) the number of dynamic shared librarys (DSO) loaded > 14,
 # (2) after that, load a dynamic shared library (DSO) with static TLS.
 # For paddle, the problem is that 'libgomp' is a DSO with static TLS, and it is loaded after 14 DSOs.
-# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'core_avx.so'.
+# So, here is a tricky way to solve the problem by pre load 'libgomp' before 'libpaddle.so'.
 # The final solution is to upgrade glibc to > 2.22 on the target system.
 if platform.system().lower() == 'linux':
    libc_type, libc_ver = get_libc_ver()
@@ -247,123 +240,65 @@ if platform.system().lower() == 'linux':
        try:
            pre_load('libgomp')
        except Exception as e:
-            # NOTE(zhiqiu): do not abort if failed, since it may success when import core_avx.so
+            # NOTE(zhiqiu): do not abort if failed, since it may success when import libpaddle.so
            sys.stderr.write('Error: Can not preload libgomp.so')

-load_noavx = False
-
-if avx_supported():
-    try:
-        from . import core_avx
-        core_avx.LoDTensor = core_avx.Tensor
-
-        from .core_avx import *
-        from .core_avx import __doc__, __file__, __name__, __package__
-        from .core_avx import __unittest_throw_exception__
-        from .core_avx import _append_python_callable_object_and_return_id
-        from .core_avx import _cleanup, _Scope
-        from .core_avx import _get_use_default_grad_op_desc_maker_ops
-        from .core_avx import _get_all_register_op_kernels
-        from .core_avx import _is_program_version_supported
-        from .core_avx import _set_eager_deletion_mode
-        from .core_avx import _get_eager_deletion_vars
-        from .core_avx import _set_fuse_parameter_group_size
-        from .core_avx import _set_fuse_parameter_memory_size
-        from .core_avx import _is_dygraph_debug_enabled
-        from .core_avx import _dygraph_debug_level
-        from .core_avx import _switch_tracer
-        from .core_avx import _set_paddle_lib_path
-        from .core_avx import _create_loaded_parameter
-        from .core_avx import _cuda_synchronize
-        from .core_avx import _is_compiled_with_heterps
-        from .core_avx import _promote_types_if_complex_exists
-        from .core_avx import _set_cached_executor_build_strategy
-        from .core_avx import _device_synchronize
-        from .core_avx import _get_current_stream
-        from .core_avx import _Profiler, _ProfilerResult, _RecordEvent
-        from .core_avx import _set_current_stream
-        if sys.platform != 'win32':
-            from .core_avx import _set_process_pids
-            from .core_avx import _erase_process_pids
-            from .core_avx import _set_process_signal_handler
-            from .core_avx import _throw_error_if_process_failed
-            from .core_avx import _convert_to_tensor_list
-            from .core_avx import _array_to_share_memory_tensor
-            from .core_avx import _cleanup_mmap_fds
-            from .core_avx import _remove_tensor_list_mmap_fds
-    except Exception as e:
-        if has_avx_core:
-            sys.stderr.write(
-                'Error: Can not import avx core while this file exists: ' +
-                current_path + os.sep + 'core_avx.' + core_suffix + '\n')
-            raise e
-        else:
-            from .. import compat as cpt
-            sys.stderr.write(
-                "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
-                "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
-                "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
-                "to get better performance.\nThe original error is: %s\n" %
-                cpt.get_exception_message(e))
-            load_noavx = True
-else:
-    load_noavx = True
-
-if load_noavx:
-    try:
-        from . import core_noavx
-        core_noavx.LoDTensor = core_noavx.Tensor
-
-        from .core_noavx import *
-        from .core_noavx import __doc__, __file__, __name__, __package__
-        from .core_noavx import __unittest_throw_exception__
-        from .core_noavx import _append_python_callable_object_and_return_id
-        from .core_noavx import _cleanup, _Scope
-        from .core_noavx import _get_use_default_grad_op_desc_maker_ops
-        from .core_noavx import _get_all_register_op_kernels
-        from .core_noavx import _is_program_version_supported
-        from .core_noavx import _set_eager_deletion_mode
-        from .core_noavx import _get_eager_deletion_vars
-        from .core_noavx import _set_fuse_parameter_group_size
-        from .core_noavx import _set_fuse_parameter_memory_size
-        from .core_noavx import _is_dygraph_debug_enabled
-        from .core_noavx import _dygraph_debug_level
-        from .core_noavx import _switch_tracer
-        from .core_noavx import _set_paddle_lib_path
-        from .core_noavx import _create_loaded_parameter
-        from .core_noavx import _cuda_synchronize
-        from .core_noavx import _is_compiled_with_heterps
-        from .core_noavx import _promote_types_if_complex_exists
-        from .core_noavx import _set_cached_executor_build_strategy
-        from .core_noavx import _device_synchronize
-        from .core_noavx import _get_current_stream
-        from .core_noavx import _set_current_stream
-        from .core_noavx import _Profiler, _ProfilerResult, _RecordEvent
-        if sys.platform != 'win32':
-            from .core_noavx import _set_process_pids
-            from .core_noavx import _erase_process_pids
-            from .core_noavx import _set_process_signal_handler
-            from .core_noavx import _throw_error_if_process_failed
-            from .core_noavx import _convert_to_tensor_list
-            from .core_noavx import _array_to_share_memory_tensor
-            from .core_noavx import _cleanup_mmap_fds
-            from .core_noavx import _remove_tensor_list_mmap_fds
-    except Exception as e:
-        if has_noavx_core:
-            sys.stderr.write(
-                'Error: Can not import noavx core while this file exists: ' +
-                current_path + os.sep + 'core_noavx.' + core_suffix + '\n')
-        elif avx_supported():
-            sys.stderr.write(
-                "Error: The installed PaddlePaddle is incorrect. You should reinstall it by "
-                "'python -m pip install --force-reinstall paddlepaddle-gpu[==version]'\n"
-            )
-        else:
-            sys.stderr.write(
-                "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
-                "you should reinstall paddlepaddle with no-avx core.\n")
-
-        raise e
+try:
+    from . import libpaddle
+    if avx_supported() and not libpaddle.is_compiled_with_avx():
+        sys.stderr.write(
+            "Hint: Your machine support AVX, but the installed paddlepaddle doesn't have avx core. "
+            "Hence, no-avx core with worse preformance will be imported.\nIf you like, you could "
+            "reinstall paddlepaddle by 'python -m pip install --force-reinstall paddlepaddle-gpu[==version]' "
+            "to get better performance.\n")
+
+    # assign tensor alias
+    libpaddle.LoDTensor = libpaddle.Tensor
+
+    from .libpaddle import *
+    from .libpaddle import __doc__, __file__, __name__, __package__
+    from .libpaddle import __unittest_throw_exception__
+    from .libpaddle import _append_python_callable_object_and_return_id
+    from .libpaddle import _cleanup, _Scope
+    from .libpaddle import _get_use_default_grad_op_desc_maker_ops
+    from .libpaddle import _get_all_register_op_kernels
+    from .libpaddle import _is_program_version_supported
+    from .libpaddle import _set_eager_deletion_mode
+    from .libpaddle import _get_eager_deletion_vars
+    from .libpaddle import _set_fuse_parameter_group_size
+    from .libpaddle import _set_fuse_parameter_memory_size
+    from .libpaddle import _is_dygraph_debug_enabled
+    from .libpaddle import _dygraph_debug_level
+    from .libpaddle import _switch_tracer
+    from .libpaddle import _set_paddle_lib_path
+    from .libpaddle import _create_loaded_parameter
+    from .libpaddle import _cuda_synchronize
+    from .libpaddle import _is_compiled_with_heterps
+    from .libpaddle import _promote_types_if_complex_exists
+    from .libpaddle import _set_cached_executor_build_strategy
+    from .libpaddle import _device_synchronize
+    from .libpaddle import _get_current_stream
+    from .libpaddle import _Profiler, _ProfilerResult, _RecordEvent
+    from .libpaddle import _set_current_stream
+    if sys.platform != 'win32':
+        from .libpaddle import _set_process_pids
+        from .libpaddle import _erase_process_pids
+        from .libpaddle import _set_process_signal_handler
+        from .libpaddle import _throw_error_if_process_failed
+        from .libpaddle import _convert_to_tensor_list
+        from .libpaddle import _array_to_share_memory_tensor
+        from .libpaddle import _cleanup_mmap_fds
+        from .libpaddle import _remove_tensor_list_mmap_fds
+except Exception as e:
+    if has_libpaddle_so:
+        sys.stderr.write(
+            'Error: Can not import paddle core while this file exists: ' +
+            current_path + os.sep + 'libpaddle.' + core_suffix + '\n')
+    if not avx_supported() and libpaddle.is_compiled_with_avx():
+        sys.stderr.write(
+            "Error: Your machine doesn't support AVX, but the installed PaddlePaddle is avx core, "
+            "you should reinstall paddlepaddle with no-avx core.\n")
+    raise e


 def set_paddle_custom_device_lib_path(lib_path):

--- a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
+++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_c_setup.py
@@ -61,9 +61,7 @@ paddle_custom_kernel_library_dir = [
 ]

 # libs
-libs = [':core_avx.so']
-if not core.has_avx_core and core.has_noavx_core:
-    libs = [':core_noavx.so']
+libs = [':libpaddle.so']

 custom_kernel_dot_module = Extension(
    'custom_kernel_dot',

--- a/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
+++ b/python/paddle/fluid/tests/custom_kernel/custom_kernel_dot_setup.py
@@ -62,9 +62,7 @@ paddle_custom_kernel_library_dir = list(
    map(lambda path: os.path.join(path, 'paddle', 'fluid'), site_packages_path))

 # libs
-libs = [':core_avx.so']
-if not core.has_avx_core and core.has_noavx_core:
-    libs = [':core_noavx.so']
+libs = [':libpaddle.so']

 custom_kernel_dot_module = Extension(
    'custom_kernel_dot',

--- a/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
+++ b/python/paddle/fluid/tests/custom_runtime/CMakeLists.txt
 if(WITH_CUSTOM_DEVICE AND NOT WITH_GPU)
  set(PLUGIN_URL https://github.com/PaddlePaddle/PaddleCustomDevice.git)
-  set(PLUGIN_TAG d5e5ac1d8e9f7588d4c2998bb3b5ffc66f65af2e)
+  set(PLUGIN_TAG b9ae8452f31525d0524810461b17856838acd821)

  file(
    GLOB TEST_OPS

--- a/python/paddle/utils/cpp_extension/cpp_extension.py
+++ b/python/paddle/utils/cpp_extension/cpp_extension.py
@@ -440,7 +440,7 @@ class BuildExtension(build_ext, object):

                # NOTE(Aurelius84): Since Paddle 2.0, we require gcc version > 5.x,
                # so we add this flag to ensure the symbol names from user compiled
-                # shared library have same ABI suffix with core_(no)avx.so.
+                # shared library have same ABI suffix with libpaddle.so.
                # See https://stackoverflow.com/questions/34571583/understanding-gcc-5s-glibcxx-use-cxx11-abi-or-the-new-abi
                add_compile_flag(cflags, ['-D_GLIBCXX_USE_CXX11_ABI=1'])
                # Append this macor only when jointly compiling .cc with .cu

--- a/python/paddle/utils/cpp_extension/extension_utils.py
+++ b/python/paddle/utils/cpp_extension/extension_utils.py
@@ -399,10 +399,7 @@ def _get_core_name():
    """
    import paddle
    ext_name = '.pyd' if IS_WINDOWS else '.so'
-    if not paddle.fluid.core.load_noavx:
-        return 'core_avx' + ext_name
-    else:
-        return 'core_noavx' + ext_name
+    return 'libpaddle' + ext_name


 def _get_lib_core_path():
@@ -419,13 +416,13 @@ def _get_dll_core_path():
    Return real path of libcore_(no)avx.dylib on Windows.
    """
    raw_core_name = _get_core_name()
-    dll_core_name = "paddle_pybind.dll"
+    dll_core_name = "libpaddle.dll"
    return os.path.join(_get_fluid_path(), dll_core_name)


 def _reset_so_rpath(so_path):
    """
-    NOTE(Aurelius84): Runtime path of core_(no)avx.so is modified into `@loader_path/../libs`
+    NOTE(Aurelius84): Runtime path of libpaddle.so is modified into `@loader_path/../libs`
    in setup.py.in. While loading custom op, `@loader_path` is the dirname of custom op
    instead of `paddle/fluid`. So we modify `@loader_path` from custom dylib into `@rpath`
    to ensure dynamic loader find it correctly.
@@ -524,7 +521,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
            # See _reset_so_rpath for details.
            extra_link_args.append('-Wl,-rpath,{}'.format(_get_fluid_path()))
            # On MacOS, ld don't support `-l:xx`, so we create a
-            # libcore_avx.dylib symbol link.
+            # liblibpaddle.dylib symbol link.
            lib_core_name = create_sym_link_if_not_exist()
            extra_link_args.append('-l{}'.format(lib_core_name))
        ###########################   -- END --    ###########################
@@ -555,7 +552,7 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):

 def create_sym_link_if_not_exist():
    """
-    Create soft symbol link of `core_avx.so` or `core_noavx.so`
+    Create soft symbol link of `libpaddle.so`
    """
    assert OS_NAME.startswith('darwin') or IS_WINDOWS

@@ -574,7 +571,7 @@ def create_sym_link_if_not_exist():
                    .format(raw_core_name, new_dll_core_path, core_path,
                            raw_core_name))
                run_cmd('mklink /H {} {}'.format(new_dll_core_path, core_path))
-        # core_avx or core_noavx with lib suffix
+        # libpaddle with lib suffix
        assert os.path.exists(new_dll_core_path)
        return raw_core_name[:-4] + ".lib"

@@ -590,7 +587,7 @@ def create_sym_link_if_not_exist():
                    "Failed to create soft symbol link for {}.\n Please execute the following command manually: `ln -s {} {}`"
                    .format(raw_core_name, core_path, new_lib_core_path))

-        # core_avx or core_noavx without suffix
+        # libpaddle without suffix
        return raw_core_name[:-3]


@@ -779,7 +776,7 @@ def find_paddle_libraries(use_cuda=False):
            cuda_lib_dir = find_cuda_libraries()
            paddle_lib_dirs.extend(cuda_lib_dir)

-    # add `paddle/fluid` to search `core_avx.so` or `core_noavx.so`
+    # add `paddle/fluid` to search `libpaddle.so`
    paddle_lib_dirs.append(_get_fluid_path())

    return paddle_lib_dirs

--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -452,8 +452,6 @@ else:
    package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + '.pyd', '${FLUID_CORE_NAME}' + '.lib']}

 package_data['paddle.fluid'] += ['${PADDLE_BINARY_DIR}/python/paddle/cost_model/static_op_benchmark.json']
-if '${HAS_NOAVX_CORE}' == 'ON':
-    package_data['paddle.fluid'] += ['core_noavx' + ('.so' if os.name != 'nt' else '.pyd')]

 package_dir={
    '': '${PADDLE_BINARY_DIR}/python',
@@ -495,7 +493,7 @@ else:
        package_data['paddle.libs'] += ['openblas' + ext_name]
    elif os.name == 'posix' and platform.machine() == 'aarch64' and '${OPENBLAS_LIB}'.endswith('so'):
        # copy the libopenblas.so on linux+aarch64
-        # special: core_noavx.so depends on 'libopenblas.so.0', not 'libopenblas.so'
+        # special: libpaddle.so without avx depends on 'libopenblas.so.0', not 'libopenblas.so'
        if os.path.exists('${OPENBLAS_LIB}' + '.0'):
            shutil.copy('${OPENBLAS_LIB}' + '.0', libs_path)
            package_data['paddle.libs'] += ['libopenblas.so.0']
@@ -590,8 +588,7 @@ if '${CMAKE_BUILD_TYPE}' == 'Release':
            commands = ["install_name_tool -id '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
            commands.append("install_name_tool -add_rpath '@loader_path/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so')
        else:
-            commands = ["patchelf --set-soname '${FLUID_CORE_NAME}.so' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
-            commands.append("patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so')
+            commands = ["patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + '.so']
        # The sw_64 not suppot patchelf, so we just disable that.
        if platform.machine() != 'sw_64' and platform.machine() != 'mips64':
            for command in commands:

--- a/tools/print_signatures.py
+++ b/tools/print_signatures.py
@@ -296,7 +296,7 @@ def parse_args():
                        dest='skipped',
                        type=str,
                        help='Skip Checking submodules',
-                        default='paddle.fluid.core_avx.eager.ops')
+                        default='paddle.fluid.libpaddle.eager.ops')

    if len(sys.argv) == 1:
        args = parser.parse_args(['paddle'])