diff --git a/CMakeLists.txt b/CMakeLists.txt
index 49ccb815c995d6ab69fcabfc9eaeba65241ab2ce..290310858fbe52718fba0e1d5357c80b61936f71 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
 option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
 option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
 option(ON_INFER "Turn on inference optimization and inference-lib generation"
-       OFF)
+       ON)
 ################################ Internal Configurations #######################################
 option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index 94fb1b4d838f9a241c12f3730138dfb0ac9bdf25..bfba3dfbac404837faaccc0fba5b2672f7190c12 100755
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -236,7 +236,7 @@ endif()
 if(WIN32
    OR APPLE
    OR NOT WITH_GPU
-   OR ON_INFER)
+   OR (ON_INFER AND NOT WITH_PYTHON))
   set(WITH_DGC OFF)
 endif()
 
diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc
index 8ee7065368c2181475e233f5e9dcbd8e828dea49..c40a80ce0752c5f77e85851eb084e4fdbcd42430 100644
--- a/paddle/fluid/framework/new_executor/new_executor_defs.cc
+++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc
@@ -21,17 +21,8 @@
 
 #include "paddle/phi/core/utils/rw_lock.h"
 
-// When in inference scenario, the scopes will not be written by two threads in
-// a mean time, but a scope may be read by multiple threads concurrently, and
-// the mutex will cause serious performance issue.
-// So the mutex is disabled when `ON_INFER`.
-#ifdef PADDLE_ON_INFERENCE
-#define SCOPE_VARS_READER_LOCK
-#define SCOPE_VARS_WRITER_LOCK
-#else
 #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
 #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
-#endif
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h
index 82eb237e73d18f259c600b1753a6120cc5d5741a..2df8892f5bd8aac45ad0af4cce3aead84da683c5 100644
--- a/paddle/fluid/framework/new_executor/new_executor_defs.h
+++ b/paddle/fluid/framework/new_executor/new_executor_defs.h
@@ -24,17 +24,8 @@
 #include "paddle/fluid/platform/event.h"
 #include "paddle/phi/core/utils/rw_lock.h"
 
-// When in inference scenario, the scopes will not be written by two threads in
-// a mean time, but a scope may be read by multiple threads concurrently, and
-// the mutex will cause serious performance issue.
-// So the mutex is disabled when `ON_INFER`.
-#ifdef PADDLE_ON_INFERENCE
-#define SCOPE_VARS_READER_LOCK
-#define SCOPE_VARS_WRITER_LOCK
-#else
 #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_);
 #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_);
-#endif
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc
index 87312cbfde2b9539ee731b13d5684fccdb1d1949..88ffeb59503d3566944c0ebcc3e861a4ba96cf97 100644
--- a/paddle/fluid/framework/scope.cc
+++ b/paddle/fluid/framework/scope.cc
@@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool(
     "Delete local scope eagerly. It will reduce GPU memory usage but "
     "slow down the destruction of variables.(around 1% performance harm)");
 
-// When in inference scenario, the scopes will not be written by two threads in
-// a mean time, but a scope may be read by multiple threads concurrently, and
-// the mutex will cause serious performance issue.
-// So the mutex is disabled when `ON_INFER`.
-#ifdef PADDLE_ON_INFERENCE
-#define SCOPE_KIDS_READER_LOCK
-#define SCOPE_KIDS_WRITER_LOCK
-#define SCOPE_VARS_READER_LOCK
-#define SCOPE_VARS_WRITER_LOCK
-#else
 #define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_);
 #define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_);
 #define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_);
 #define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_);
-#endif
 
 namespace paddle {
 namespace framework {
diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h
index cf939a5698ba8d7de9f6c61da3e7933f715d3b76..7f08fc9b4e22c340469a277940ea10d7933fb009 100644
--- a/paddle/fluid/framework/scope.h
+++ b/paddle/fluid/framework/scope.h
@@ -179,12 +179,9 @@ class Scope : public ScopeBase {
 
   DISABLE_COPY_AND_ASSIGN(Scope);
 
-#ifndef PADDLE_ON_INFERENCE
-
  private:
   mutable phi::RWLock kids_lock_;
   mutable phi::RWLock vars_lock_;
-#endif
 };
 
 // Generate some debug string about the inherience structure of scope, quite
diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc
index 28dddc1fbebdd6b9885a1dcf1659cda79daf898d..dd66ab179b26d1f3faf4636e027c80f2eab04ce6 100644
--- a/paddle/fluid/platform/flags.cc
+++ b/paddle/fluid/platform/flags.cc
@@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32(
  *       enable garbage collection strategy when training large networks.
  */
 // Disable gc by default when inference library is built
-#ifdef PADDLE_ON_INFERENCE
-static const double kDefaultEagerDeleteTensorGB = -1;
-#else
 static const double kDefaultEagerDeleteTensorGB = 0;
-#endif
 
 PADDLE_DEFINE_EXPORTED_double(
     eager_delete_tensor_gb,
@@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run");
  * If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error
  * message summary will be shown.
  */
-#ifdef PADDLE_ON_INFERENCE
-static const int32_t kDefaultCallStackLevel = 2;
-#else
 static const int32_t kDefaultCallStackLevel = 1;
-#endif
 
 PADDLE_DEFINE_EXPORTED_int32(
     call_stack_level,
diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt
index b784affc07e7ec5d278541bd066db281e4ca7ec5..72885c0bbe5b7ece2dd62a721913d12e1739376f 100755
--- a/paddle/fluid/pybind/CMakeLists.txt
+++ b/paddle/fluid/pybind/CMakeLists.txt
@@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE)
   set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry)
 endif()
 
-if(NOT ON_INFER)
+if(WITH_PYTHON)
   set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer)
   if(WITH_NCCL OR WITH_RCCL)
     set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl)
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
index b366517cafa63da5a864f9f9c3d6b076e6b7b299..0044f037fc01714705ef94a4d1e6e96aa5b5519e 100644
--- a/paddle/fluid/pybind/pybind.cc
+++ b/paddle/fluid/pybind/pybind.cc
@@ -2547,7 +2547,7 @@ All parameter, weight, gradient are variables in Paddle.
   BindCompatible(&m);
   BindDataset(&m);
   BindGenerator(&m);
-#ifndef PADDLE_ON_INFERENCE
+#ifndef PADDLE_NO_PYTHON
   BindDistributed(&m);
 #endif
 #ifdef PADDLE_WITH_ASCEND
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 58debc39dd9e90cbd33170b800e6534c4995af74..bdd6d375bfd04874003cb09693c986e57c378111 100755
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -799,7 +799,7 @@ py_test_modules(
 # it is found that windows CI will run all the training unittests with the ON_INFER option turned on,
 # which will not appear in other CIs. The calculation behavior of some ops in inference mode is
 # inconsistent with that in non-inference mode.
-if(NOT ON_INFER)
+if(WITH_PYTHON)
   py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES
                   test_parallel_executor_seresnext_base_cpu)
   py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES
diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt
index e1611d524ab8ea9eba88f46c6dcba04e800d401b..dba92870d3ab72813b7e03cf28d2f5d8516b8e01 100644
--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt
@@ -34,7 +34,7 @@ set(TEST_EAGER_OPS
 list(REMOVE_ITEM TEST_OPS test_lac)
 # NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope
 # will be removed and will cause some random failed in multi-thread.
-if(NOT ON_INFER)
+if(WITH_PYTHON)
   py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1)
   set_tests_properties(test_lac PROPERTIES TIMEOUT 120)
 endif()