diff --git a/CMakeLists.txt b/CMakeLists.txt
index c75b83e50cf9cef8290c37f88b38cdc3d77df39c..dcd1218a5b0b62f2739b727391aca31b48ed9ccb 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,8 +36,8 @@ include(simd)
 ################################ Configurations #######################################
 option(WITH_GPU         "Compile PaddlePaddle with NVIDIA GPU"          ${CUDA_FOUND})
 option(WITH_AVX         "Compile PaddlePaddle with AVX intrinsics"      ${AVX_FOUND})
-option(WITH_MKLDNN      "Compile PaddlePaddle with mkl-dnn support."    OFF)
-option(WITH_MKLML       "Compile PaddlePaddle with mklml package."      OFF)
+option(WITH_MKLDNN      "Compile PaddlePaddle with mkl-dnn support."    ${AVX_FOUND})
+option(WITH_MKLML       "Compile PaddlePaddle with mklml package."      ${AVX_FOUND})
 option(WITH_DSO         "Compile PaddlePaddle with dynamic linked CUDA" ON)
 option(WITH_TESTING     "Compile PaddlePaddle with unit testing"        ON)
 option(WITH_SWIG_PY     "Compile PaddlePaddle with inference api"       ON)
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index db09232c0e69016bf18c1d981e4620e9e804ff7c..0eeccbf7d8a1df17351c8914df6dabf005802787 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -73,10 +73,18 @@ INCLUDE_DIRECTORIES(${CBLAS_INC_DIR})
 # linear algebra libraries for cc_library(xxx SRCS xxx.c DEPS cblas)
 SET(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/cblas_dummy.c)
 FILE(WRITE ${dummyfile} "const char * dummy = \"${dummyfile}\";")
-ADD_LIBRARY(cblas STATIC ${dummyfile})
+IF(${CBLAS_PROVIDER} MATCHES MKL)
+    ADD_LIBRARY(cblas SHARED ${dummyfile})
+ELSE()
+    ADD_LIBRARY(cblas STATIC ${dummyfile})
+ENDIF()
 TARGET_LINK_LIBRARIES(cblas ${CBLAS_LIBRARIES})
 
 IF(NOT ${CBLAS_FOUND})
     ADD_DEPENDENCIES(cblas extern_openblas)
     LIST(APPEND external_project_dependencies cblas)
+ELSE()
+    IF("${CBLAS_PROVIDER}" STREQUAL "MKLML")
+        ADD_DEPENDENCIES(cblas mklml)
+    ENDIF()
 ENDIF(NOT ${CBLAS_FOUND})
diff --git a/paddle/operators/math/CMakeLists.txt b/paddle/operators/math/CMakeLists.txt
index abcaf940ab0128d6948acc620d678632c8f48960..ed51d416ed9497eee45ba826ad672b8fb1ad3678 100644
--- a/paddle/operators/math/CMakeLists.txt
+++ b/paddle/operators/math/CMakeLists.txt
@@ -1,13 +1,8 @@
-if(WITH_MKLML)
-    set(BLAS_LIB mklml)
-else()
-    set(BLAS_LIB cblas)
-endif()
 
 if(WITH_GPU)
-    nv_library(math_function SRCS math_function.cc math_function.cu DEPS ${BLAS_LIB} device_context)
+    nv_library(math_function SRCS math_function.cc math_function.cu DEPS cblas device_context)
 else()
-    cc_library(math_function SRCS math_function.cc DEPS ${BLAS_LIB} device_context)
+    cc_library(math_function SRCS math_function.cc DEPS cblas device_context)
 endif()
 
 nv_test(math_function_test SRCS math_function_test.cc DEPS math_function tensor)
diff --git a/paddle/scripts/submit_local.sh.in b/paddle/scripts/submit_local.sh.in
index 12bf629ea920832f96bc5f7cc0b38abfddd34d97..2ab7d5b52f68641999603d8b675d8fabf24fd574 100755
--- a/paddle/scripts/submit_local.sh.in
+++ b/paddle/scripts/submit_local.sh.in
@@ -18,6 +18,8 @@ function version(){
         echo "PaddlePaddle @PADDLE_VERSION@, compiled with"
         echo "    with_avx: @WITH_AVX@"
         echo "    with_gpu: @WITH_GPU@"
+        echo "    with_mkldnn: @WITH_MKLDNN"
+        echo "    with_mklml: @WITH_MKLML@"
         echo "    with_double: @WITH_DOUBLE@"
         echo "    with_python: @WITH_PYTHON@"
         echo "    with_rdma: @WITH_RDMA@"
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 16c519d45aa62694201379b8da1ca54d8a07ee9a..d2f064bea00ff55f29df9269c2aab7619f2a94c5 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -21,6 +21,18 @@ if(WITH_GOLANG)
   add_dependencies(copy_paddle_master paddle_master)
 endif(WITH_GOLANG)
 
+set(MKL_SHARED_LIBS "")
+set(MKL_DEPENDS "")
+if(WITH_MKLML)
+  list(APPEND MKL_SHARED_LIBS ${MKLML_LIB} ${MKLML_IOMP_LIB})
+  list(APPEND MKL_DEPENDS mklml)
+endif()
+
+if(WITH_MKLDNN)
+  list(APPEND MKL_SHARED_LIBS "${MKLDNN_LIB}" "${MKLDNN_LIB}.0")
+  list(APPEND MKL_DEPENDS mkldnn)
+endif()
+
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.in
     ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
 
@@ -39,7 +51,7 @@ add_custom_command(OUTPUT ${PADDLE_PYTHON_BUILD_DIR}/.timestamp
     DEPENDS gen_proto_py copy_paddle_pybind framework_py_proto ${PY_FILES} ${external_project_dependencies} ${COPY_PADDLE_MASTER})
 
 add_custom_target(paddle_python ALL DEPENDS
-    ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel)
+    ${PADDLE_PYTHON_BUILD_DIR}/.timestamp paddle_pserver_main paddle_trainer paddle_merge_model python_api_wheel ${MKL_DEPENDS})
 
 set(PADDLE_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
 
diff --git a/python/setup.py.in b/python/setup.py.in
index 38728aa2fd77cf3c882479ed83e99688b9ffa541..287442e013f91df1eed9c629b7767a660d5e30d7 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -23,6 +23,16 @@ with open('@PADDLE_SOURCE_DIR@/python/requirements.txt') as f:
 if '${CMAKE_SYSTEM_PROCESSOR}' not in ['arm', 'armv7-a', 'aarch64']:
     setup_requires+=["opencv-python"]
 
+# the prefix is sys.prefix which should always be usr
+paddle_bin_dir = 'local/opt/paddle/bin'
+paddle_bins = ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage',
+               '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
+               '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
+               '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main']
+
+paddle_rt_lib_dir = 'local/lib'
+paddle_rt_libs = [] if '${MKL_SHARED_LIBS}'== '' else '${MKL_SHARED_LIBS}'.split(';')
+
 setup(name='paddlepaddle',
       version='${PADDLE_VERSION}',
       description='Parallel Distributed Deep Learning',
@@ -42,9 +52,6 @@ setup(name='paddlepaddle',
       },
       scripts=['${PADDLE_BINARY_DIR}/paddle/scripts/paddle'],
       distclass=BinaryDistribution,
-      data_files=[('/usr/local/opt/paddle/bin',
-                       ['${PADDLE_BINARY_DIR}/paddle/scripts/paddle_usage',
-                        '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_trainer',
-                        '${PADDLE_BINARY_DIR}/paddle/trainer/paddle_merge_model',
-                        '${PADDLE_BINARY_DIR}/paddle/pserver/paddle_pserver_main'])]
+      data_files=[(paddle_bin_dir, paddle_bins),
+                  (paddle_rt_lib_dir, paddle_rt_libs)]
 )