cherry pick from combine noavx and avx package (#17889) (#18036)

test=release/1.5 * support avx and noavx core * add catch and give some log * fix build * add missing package * fix pybind name * fix import error * conbime noavx core * add requirements * fix unkown message * fix api spec * refine and clean * update * follow comments * refine scripts

cherry pick from combine noavx and avx package (#17889) (#18036)
test=release/1.5 * support avx and noavx core * add catch and give some log * fix build * add missing package * fix pybind name * fix import error * conbime noavx core * add requirements * fix unkown message * fix api spec * refine and clean * update * follow comments * refine scripts
7e31d5a2 · tensor-tang · GitHub · e4159cd6 · 7e31d5a2 · 7e31d5a2
10 changed file
--- a/Dockerfile
+++ b/Dockerfile
@@ -92,17 +92,17 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
 # specify sphinx version as 1.5.6 and remove -U option for [pip install -U
 # sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
 # version(1.7.1 for now), which causes building documentation failed.
-RUN pip3 --no-cache-dir install -U wheel && \
+RUN pip3 --no-cache-dir install -U wheel x86cpu==0.4 && \
    pip3 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
    pip3 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.6 --no-cache-dir install -U wheel && \
+    pip3.6 --no-cache-dir install -U wheel x86cpu==0.4 && \
    pip3.6 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
    pip3.6 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
-    pip3.7 --no-cache-dir install -U wheel && \
+    pip3.7 --no-cache-dir install -U wheel x86cpu==0.4 && \
    pip3.7 --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
    pip3.7 --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark && \
    easy_install -U pip && \
-    pip --no-cache-dir install -U pip setuptools wheel && \
+    pip --no-cache-dir install -U pip setuptools wheel x86cpu==0.4 && \
    pip --no-cache-dir install -U docopt PyYAML sphinx==1.5.6 && \
    pip --no-cache-dir install sphinx-rtd-theme==0.1.9 recommonmark


--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -30,6 +30,7 @@ endif(NOT WITH_PROFILER)

 if(WITH_AVX AND AVX_FOUND)
    set(SIMD_FLAG ${AVX_FLAG})
+    add_definitions(-DPADDLE_WITH_AVX)
 elseif(SSE3_FOUND)
    set(SIMD_FLAG ${SSE3_FLAG})
 endif()

--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -43,10 +43,10 @@ paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name']
 paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', '0e17773521634ef798fddd7d2ea3ef96'))
 paddle.fluid.CompiledProgram.with_inference_optimize (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=None), ('document', '9e5b009d850191a010e859189c127fd8'))
-paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.ExecutionStrategy) -> None
-paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
-paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
-paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core.ParallelExecutor.BuildStrategy) -> None
+paddle.fluid.ExecutionStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.ExecutionStrategy) -> None
+paddle.fluid.BuildStrategy.GradientScaleStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.BuildStrategy.GradientScaleStrategy, arg0: int) -> None
+paddle.fluid.BuildStrategy.ReduceStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.BuildStrategy.ReduceStrategy, arg0: int) -> None
+paddle.fluid.BuildStrategy.__init__ __init__(self: paddle.fluid.core_avx.ParallelExecutor.BuildStrategy) -> None
 paddle.fluid.io.save_vars (ArgSpec(args=['executor', 'dirname', 'main_program', 'vars', 'predicate', 'filename'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '869104f47e6fd21d897c3fcc426aa942'))
 paddle.fluid.io.save_params (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '07ffd5351b30cf47172ccfd61bd0de6f'))
 paddle.fluid.io.save_persistables (ArgSpec(args=['executor', 'dirname', 'main_program', 'filename'], varargs=None, keywords=None, defaults=(None, None)), ('document', '9141bb5f32caf7975eb3fd88c8a1b2da'))
@@ -545,18 +545,18 @@ paddle.fluid.optimizer.ExponentialMovingAverage.update (ArgSpec(args=['self'], v
 paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '08a5dd9f6f376ff3d55e0b1d92115cbd'))
 paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core.LoDTensor, arg0: List[List[int]]) -> None  2. __init__(self: paddle.fluid.core.LoDTensor) -> None
-paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> bool
-paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
-paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor) -> List[List[int]]
-paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core.LoDTensor, lod: List[List[int]]) -> None
-paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None
-paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core.Tensor) -> List[int]
-paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core.LoDTensorArray) -> None
-paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) -> None
-paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core.CPUPlace) -> None
-paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core.CUDAPlace, arg0: int) -> None
-paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core.CUDAPinnedPlace) -> None
+paddle.fluid.LoDTensor.__init__ 1. __init__(self: paddle.fluid.core_avx.LoDTensor, arg0: List[List[int]]) -> None  2. __init__(self: paddle.fluid.core_avx.LoDTensor) -> None
+paddle.fluid.LoDTensor.has_valid_recursive_sequence_lengths has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> bool
+paddle.fluid.LoDTensor.lod lod(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]]
+paddle.fluid.LoDTensor.recursive_sequence_lengths recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) -> List[List[int]]
+paddle.fluid.LoDTensor.set_lod set_lod(self: paddle.fluid.core_avx.LoDTensor, lod: List[List[int]]) -> None
+paddle.fluid.LoDTensor.set_recursive_sequence_lengths set_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor, recursive_sequence_lengths: List[List[int]]) -> None
+paddle.fluid.LoDTensor.shape shape(self: paddle.fluid.core_avx.Tensor) -> List[int]
+paddle.fluid.LoDTensorArray.__init__ __init__(self: paddle.fluid.core_avx.LoDTensorArray) -> None
+paddle.fluid.LoDTensorArray.append append(self: paddle.fluid.core_avx.LoDTensorArray, tensor: paddle.fluid.core_avx.LoDTensor) -> None
+paddle.fluid.CPUPlace.__init__ __init__(self: paddle.fluid.core_avx.CPUPlace) -> None
+paddle.fluid.CUDAPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPlace, arg0: int) -> None
+paddle.fluid.CUDAPinnedPlace.__init__ __init__(self: paddle.fluid.core_avx.CUDAPinnedPlace) -> None
 paddle.fluid.ParamAttr.__init__ (ArgSpec(args=['self', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.WeightNormParamAttr.__init__ (ArgSpec(args=['self', 'dim', 'name', 'initializer', 'learning_rate', 'regularizer', 'trainable', 'gradient_clip', 'do_model_average'], varargs=None, keywords=None, defaults=(None, None, None, 1.0, None, True, None, False)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.DataFeeder.__init__ (ArgSpec(args=['self', 'feed_list', 'place', 'program'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
@@ -580,7 +580,7 @@ paddle.fluid.unique_name.switch (ArgSpec(args=['new_generator'], varargs=None, k
 paddle.fluid.unique_name.guard (ArgSpec(args=['new_generator'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ead717d6d440a1eb11971695cd1727f4'))
 paddle.fluid.recordio_writer.convert_reader_to_recordio_file (ArgSpec(args=['filename', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', '65c7523e86f0c50bb729b01667f36310'))
 paddle.fluid.recordio_writer.convert_reader_to_recordio_files (ArgSpec(args=['filename', 'batch_per_file', 'reader_creator', 'feeder', 'compressor', 'max_num_records', 'feed_order'], varargs=None, keywords=None, defaults=(Compressor.Snappy, 1000, None)), ('document', 'bc643f0f5f1b9db57ff0d8a57d379bd7'))
-paddle.fluid.Scope Scope() -> paddle.fluid.core._Scope
+paddle.fluid.Scope Scope() -> paddle.fluid.core_avx._Scope
 paddle.fluid.install_check.run_check (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', '66b7c84a17ed32fec2df9628367be2b9'))
 paddle.reader.cache (ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None), ('document', '1676886070eb607cb608f7ba47be0d3c'))
 paddle.reader.map_readers (ArgSpec(args=['func'], varargs='readers', keywords=None, defaults=None), ('document', '77cbadb09df588e21e5cc0819b69c87d'))

--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -145,7 +145,12 @@ static inline int PlaceIndex(const PlaceType &p) {
  return static_cast<int>(paddle::platform::Place(p).which());
 }

-PYBIND11_MODULE(core, m) {
+#ifdef PADDLE_WITH_AVX
+PYBIND11_MODULE(core_avx, m) {
+#else
+PYBIND11_MODULE(core_noavx, m) {
+#endif
+
  // Not used, just make sure cpu_info.cc is linked.
  paddle::platform::CpuTotalPhysicalMemory();


--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -52,10 +52,7 @@ function init() {
    fi
 }

-function cmake_gen() {
-    mkdir -p ${PADDLE_ROOT}/build
-    cd ${PADDLE_ROOT}/build
-
+function cmake_base() {
    # build script will not fail if *.deb does not exist
    rm *.deb 2>/dev/null || true
    # delete previous built whl packages
@@ -227,6 +224,7 @@ EOF
        -DWITH_MKL=${WITH_MKL:-ON} \
        -DWITH_NGRAPH=${WITH_NGRAPH:-OFF} \
        -DWITH_AVX=${WITH_AVX:-OFF} \
+        -DNOAVX_CORE_FILE=${NOAVX_CORE_FILE:-""} \
        -DWITH_GOLANG=${WITH_GOLANG:-OFF} \
        -DCUDA_ARCH_NAME=${CUDA_ARCH_NAME:-All} \
        -DCUDA_ARCH_BIN=${CUDA_ARCH_BIN} \
@@ -249,6 +247,12 @@ EOF

 }

+function cmake_gen() {
+    mkdir -p ${PADDLE_ROOT}/build
+    cd ${PADDLE_ROOT}/build
+    cmake_base $1
+}
+
 function abort(){
    echo "Your change doesn't follow PaddlePaddle's code style." 1>&2
    echo "Please use pre-commit to check what is wrong." 1>&2
@@ -287,6 +291,17 @@ function check_style() {
 #              Build
 #=================================================

+function build_base() {
+    parallel_number=`nproc`
+    if [[ "$1" != "" ]]; then
+      parallel_number=$1
+    fi
+    make clean
+    make -j ${parallel_number}
+    make install -j `nproc`
+}
+
+
 function build() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
@@ -295,13 +310,7 @@ function build() {
    Building in /paddle/build ...
    ============================================
 EOF
-    parallel_number=`nproc`
-    if [[ "$1" != "" ]]; then
-      parallel_number=$1
-    fi
-    make clean
-    make -j ${parallel_number}
-    make install -j `nproc`
+    build_base $1
 }

 function build_mac() {
@@ -334,6 +343,25 @@ EOF
    fi
 }

+
+function combine_avx_noavx_build() {
+    mkdir -p ${PADDLE_ROOT}/build.noavx
+    cd ${PADDLE_ROOT}/build.noavx
+    WITH_AVX=OFF
+    cmake_base ${PYTHON_ABI:-""}
+    build_base
+
+    # build combined one
+    mkdir -p ${PADDLE_ROOT}/build
+    cd ${PADDLE_ROOT}/build
+    NOAVX_CORE_FILE=`find ${PADDLE_ROOT}/build.noavx/python/paddle/fluid/ -name "core_noavx.*"`
+    WITH_AVX=ON
+
+    cmake_base ${PYTHON_ABI:-""}
+    build_base
+}
+
+
 function run_brpc_test() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
@@ -637,9 +665,7 @@ function card_test() {
    set +m
 }

-function parallel_test() {
-    mkdir -p ${PADDLE_ROOT}/build
-    cd ${PADDLE_ROOT}/build
+function parallel_test_base() {
    if [ ${WITH_TESTING:-ON} == "ON" ] ; then
    cat <<EOF
    ========================================
@@ -710,6 +736,12 @@ set -ex
    fi
 }

+function parallel_test() {
+    mkdir -p ${PADDLE_ROOT}/build
+    cd ${PADDLE_ROOT}/build
+    parallel_test_base
+}
+
 function gen_doc_lib() {
    mkdir -p ${PADDLE_ROOT}/build
    cd ${PADDLE_ROOT}/build
@@ -807,7 +839,7 @@ EOF
    # run paddle version to install python packages first
    RUN apt-get update && ${NCCL_DEPS}
    RUN apt-get install -y wget python3 python3-pip libgtk2.0-dev dmidecode python3-tk && \
-        pip3 install opencv-python && pip3 install /*.whl; apt-get install -f -y && \
+        pip3 install opencv-python x86cpu==0.4 && pip3 install /*.whl; apt-get install -f -y && \
        apt-get clean -y && \
        rm -f /*.whl && \
        ${PADDLE_VERSION} && \
@@ -961,6 +993,13 @@ function main() {
        gen_dockerfile ${PYTHON_ABI:-""}
        assert_api_spec_approvals
        ;;
+      combine_avx_noavx)
+        combine_avx_noavx_build
+        ;;
+      combine_avx_noavx_build_and_test)
+        combine_avx_noavx_build
+        parallel_test_base
+        ;;
      test)
        parallel_test
        ;;

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -10,19 +10,60 @@ else()
  SET(PACKAGE_NAME "paddlepaddle")
 endif()

+set(FLUID_CORE_NAME "core")
+if(WITH_AVX AND AVX_FOUND)
+  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_avx")
+  if(NOT DEFINED NOAVX_CORE_FILE OR NOAVX_CORE_FILE STREQUAL "")
+    message(WARNING "You are building AVX version without NOAVX core, \
+      and the wheel package may fail on NOAVX machine.")
+  endif()
+
+  if(NOAVX_CORE_FILE AND NOT EXISTS "${NOAVX_CORE_FILE}")
+    message(FATAL_ERROR "The file ${NOAVX_CORE_FILE} does not exist!")
+  endif()
+
+  set(HAS_NOAVX_CORE ON)
+else()
+  set(FLUID_CORE_NAME "${FLUID_CORE_NAME}_noavx")
+endif()
+
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
    ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+
+set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/)
+
 IF(WIN32)
    # Python would use the .pyd by default under Windows series platform
-    set(FLUID_DST_DIR ${PADDLE_BINARY_DIR}/python/paddle/fluid/)
-    set(FLUID_CORE ${FLUID_DST_DIR}/core.pyd)
+    set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.pyd)
+    set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.pyd)
 ELSE()
-    set(FLUID_CORE ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so)
+    set(FLUID_CORE ${FLUID_DST_DIR}/${FLUID_CORE_NAME}.so)
+    set(FLUID_NOAVX_CORE ${FLUID_DST_DIR}/core_noavx.so)
 ENDIF()
+
+set(FLUID_CORE_DEPS ${FLUID_CORE})
+
+if(HAS_NOAVX_CORE AND EXISTS "${NOAVX_CORE_FILE}")
+  get_filename_component(NOAVX_CORE_NAME ${NOAVX_CORE_FILE} NAME)
+  get_filename_component(NOAVX_CORE_EXT ${NOAVX_CORE_FILE} EXT)
+  if(WIN32)
+    if(NOT NOAVX_CORE_EXT STREQUAL ".pyd")
+      message(FATAL_ERROR "Wrong file ${NOAVX_CORE_NAME}, the ext does not match windows *.pyd!")
+    endif()
+  else()
+    if(NOT NOAVX_CORE_EXT STREQUAL ".so")
+      message(FATAL_ERROR "Wrong file ${NOAVX_CORE_NAME}, the ext does not match *.so!")
+    endif()
+  endif()
+  add_custom_command(OUTPUT ${FLUID_NOAVX_CORE}
+    COMMAND cmake -E copy ${NOAVX_CORE_FILE} ${FLUID_NOAVX_CORE} DEPENDS paddle_pybind)
+  list(APPEND FLUID_CORE_DEPS ${FLUID_NOAVX_CORE})
+endif()
+
 add_custom_command(OUTPUT ${FLUID_CORE}
        COMMAND cmake -E copy $<TARGET_FILE:paddle_pybind> ${FLUID_CORE}
        DEPENDS paddle_pybind)
-add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE})
+add_custom_target(copy_paddle_pybind ALL DEPENDS ${FLUID_CORE_DEPS})

 IF(WIN32)
    add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp

--- a/python/paddle/fluid/core.py
+++ b/python/paddle/fluid/core.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import sys
+import os
+from x86cpu import info as cpuinfo
+
+try:
+    if os.name == 'nt':
+        third_lib_path = os.path.abspath(os.path.dirname(
+            __file__)) + os.sep + '..' + os.sep + 'libs'
+        os.environ['path'] += ';' + third_lib_path
+        sys.path.append(third_lib_path)
+
+except ImportError as e:
+    from .. import compat as cpt
+    if os.name == 'nt':
+        executable_path = os.path.abspath(os.path.dirname(sys.executable))
+        raise ImportError(
+            """NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
+        if you encounters \"DLL load failed\" errors. If you have python
+        installed in other directory, replace \"%s\" with your own
+        directory. The original error is: \n %s""" %
+            (executable_path, executable_path, cpt.get_exception_message(e)))
+    else:
+        raise ImportError(
+            """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
+        if you encounters \"libmkldnn.so not found\" errors. If you have python
+        installed in other directory, replace \"/usr/local/lib\" with your own
+        directory. The original error is: \n""" + cpt.get_exception_message(e))
+except Exception as e:
+    raise e
+
+load_noavx = False
+if cpuinfo.supports_avx:
+    try:
+        from .core_avx import *
+        from .core_avx import __doc__, __file__, __name__, __package__
+        from .core_avx import __unittest_throw_exception__
+        from .core_avx import _append_python_callable_object_and_return_id
+        from .core_avx import _cleanup, _Scope
+        from .core_avx import _get_use_default_grad_op_desc_maker_ops
+        from .core_avx import _is_program_version_supported
+        from .core_avx import _set_eager_deletion_mode
+        from .core_avx import _set_fuse_parameter_group_size
+        from .core_avx import _set_fuse_parameter_memory_size
+    except ImportError as error:
+        sys.stderr.write(
+            error.__class__.__name__ +
+            ' WARNING: Error importing avx core. You may not build with AVX, '
+            'but AVX is supported on local machine, you could build paddle '
+            'WITH_AVX=ON to get better performance. ')
+        load_noavx = True
+else:
+    load_noavx = True
+
+if load_noavx:
+    try:
+        from .core_noavx import *
+        from .core_noavx import __doc__, __file__, __name__, __package__
+        from .core_noavx import __unittest_throw_exception__
+        from .core_noavx import _append_python_callable_object_and_return_id
+        from .core_noavx import _cleanup, _Scope
+        from .core_noavx import _get_use_default_grad_op_desc_maker_ops
+        from .core_noavx import _is_program_version_supported
+        from .core_noavx import _set_eager_deletion_mode
+        from .core_noavx import _set_fuse_parameter_group_size
+        from .core_noavx import _set_fuse_parameter_memory_size
+    except ImportError as error:
+        sys.exit("Error: Can not load core_noavx.* ." +
+                 error.__class__.__name__)
+        load_noavx = True
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -30,32 +30,8 @@ import multiprocessing

 from .. import compat as cpt
 from .proto import framework_pb2
-try:
-    if os.name == 'nt':
-        import sys
-        third_lib_path = os.path.abspath(os.path.dirname(
-            __file__)) + os.sep + '..' + os.sep + 'libs'
-        os.environ['path'] += ';' + third_lib_path
-        sys.path.append(third_lib_path)
-
-    from . import core
-except ImportError as e:
-    if os.name == 'nt':
-        executable_path = os.path.abspath(os.path.dirname(sys.executable))
-        raise ImportError(
-            """NOTE: You may need to run \"set PATH=%s;%%PATH%%\"
-        if you encounters \"DLL load failed\" errors. If you have python
-        installed in other directory, replace \"%s\" with your own
-        directory. The original error is: \n %s""" %
-            (executable_path, executable_path, cpt.get_exception_message(e)))
-    else:
-        raise ImportError(
-            """NOTE: You may need to run \"export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH\"
-        if you encounters \"libmkldnn.so not found\" errors. If you have python
-        installed in other directory, replace \"/usr/local/lib\" with your own
-        directory. The original error is: \n""" + cpt.get_exception_message(e))
-except Exception as e:
-    raise e
+
+from . import core
 from . import unique_name

 __all__ = [

--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -13,3 +13,4 @@ funcsigs
 pyyaml
 decorator
 prettytable
+x86cpu==0.4
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -145,7 +145,9 @@ if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
 paddle_bins = ''
 if not '${WIN32}':
    paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle']
-package_data={'paddle.fluid': ['core' + (ext_name if os.name != 'nt' else '.pyd')]}
+package_data={'paddle.fluid': ['${FLUID_CORE_NAME}' + (ext_name if os.name != 'nt' else '.pyd')]}
+if '${HAS_NOAVX_CORE}' == 'ON':
+    package_data['paddle.fluid'] += ['core_noavx' + (ext_name if os.name != 'nt' else '.pyd')]

 package_dir={
    '': '${PADDLE_BINARY_DIR}/python',
@@ -213,19 +215,19 @@ if os.path.isfile(libs_path+'/__init__.py'):
    os.remove(libs_path+'/__init__.py')
 package_dir['paddle.libs']=libs_path

-# change rpath of core.ext, add $ORIGIN/../libs/ to it.
+# change rpath of ${FLUID_CORE_NAME}.ext, add $ORIGIN/../libs/ to it.
 # The reason is that libwarpctc.ext, libiomp5.ext etc are in paddle.libs, and
-# core.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
+# ${FLUID_CORE_NAME}.ext is in paddle.fluid, thus paddle/fluid/../libs will pointer to above libraries.
 # This operation will fix https://github.com/PaddlePaddle/Paddle/issues/3213
 if '${CMAKE_BUILD_TYPE}' == 'Release':
    if os.name != 'nt':
-        # only change rpath in Release mode, since in Debug mode, core.xx is too large to be changed.
+        # only change rpath in Release mode, since in Debug mode, ${FLUID_CORE_NAME}.xx is too large to be changed.
        if "@APPLE@" == "1":
-            command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/core" + ext_name
+            command = "install_name_tool -id \"@loader_path/../libs/\" ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + ext_name
        else:
-            command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core" + ext_name
+            command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/${FLUID_CORE_NAME}" + ext_name
        if os.system(command) != 0:
-            raise Exception("patch core.%s failed, command: %s" % (ext_name, command))
+            raise Exception("patch ${FLUID_CORE_NAME}.%s failed, command: %s" % (ext_name, command))

 ext_modules = [Extension('_foo', ['stub.cc'])]
 if os.name == 'nt':