diff --git a/CMakeLists.txt b/CMakeLists.txt index 49ccb815c995d6ab69fcabfc9eaeba65241ab2ce..290310858fbe52718fba0e1d5357c80b61936f71 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation" - OFF) + ON) ################################ Internal Configurations ####################################### option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 94fb1b4d838f9a241c12f3730138dfb0ac9bdf25..bfba3dfbac404837faaccc0fba5b2672f7190c12 100755 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -236,7 +236,7 @@ endif() if(WIN32 OR APPLE OR NOT WITH_GPU - OR ON_INFER) + OR (ON_INFER AND NOT WITH_PYTHON)) set(WITH_DGC OFF) endif() diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 8ee7065368c2181475e233f5e9dcbd8e828dea49..c40a80ce0752c5f77e85851eb084e4fdbcd42430 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -21,17 +21,8 @@ #include "paddle/phi/core/utils/rw_lock.h" -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 82eb237e73d18f259c600b1753a6120cc5d5741a..2df8892f5bd8aac45ad0af4cce3aead84da683c5 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -24,17 +24,8 @@ #include "paddle/fluid/platform/event.h" #include "paddle/phi/core/utils/rw_lock.h" -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 87312cbfde2b9539ee731b13d5684fccdb1d1949..88ffeb59503d3566944c0ebcc3e861a4ba96cf97 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool( "Delete local scope eagerly. It will reduce GPU memory usage but " "slow down the destruction of variables.(around 1% performance harm)"); -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_KIDS_READER_LOCK -#define SCOPE_KIDS_WRITER_LOCK -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_); #define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_); #define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index cf939a5698ba8d7de9f6c61da3e7933f715d3b76..7f08fc9b4e22c340469a277940ea10d7933fb009 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -179,12 +179,9 @@ class Scope : public ScopeBase { DISABLE_COPY_AND_ASSIGN(Scope); -#ifndef PADDLE_ON_INFERENCE - private: mutable phi::RWLock kids_lock_; mutable phi::RWLock vars_lock_; -#endif }; // Generate some debug string about the inherience structure of scope, quite diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 28dddc1fbebdd6b9885a1dcf1659cda79daf898d..dd66ab179b26d1f3faf4636e027c80f2eab04ce6 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32( * enable garbage collection strategy when training large networks. */ // Disable gc by default when inference library is built -#ifdef PADDLE_ON_INFERENCE -static const double kDefaultEagerDeleteTensorGB = -1; -#else static const double kDefaultEagerDeleteTensorGB = 0; -#endif PADDLE_DEFINE_EXPORTED_double( eager_delete_tensor_gb, @@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run"); * If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error * message summary will be shown. */ -#ifdef PADDLE_ON_INFERENCE -static const int32_t kDefaultCallStackLevel = 2; -#else static const int32_t kDefaultCallStackLevel = 1; -#endif PADDLE_DEFINE_EXPORTED_int32( call_stack_level, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index b784affc07e7ec5d278541bd066db281e4ca7ec5..72885c0bbe5b7ece2dd62a721913d12e1739376f 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) endif() -if(NOT ON_INFER) +if(WITH_PYTHON) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if(WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b366517cafa63da5a864f9f9c3d6b076e6b7b299..0044f037fc01714705ef94a4d1e6e96aa5b5519e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2547,7 +2547,7 @@ All parameter, weight, gradient are variables in Paddle. BindCompatible(&m); BindDataset(&m); BindGenerator(&m); -#ifndef PADDLE_ON_INFERENCE +#ifndef PADDLE_NO_PYTHON BindDistributed(&m); #endif #ifdef PADDLE_WITH_ASCEND diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 58debc39dd9e90cbd33170b800e6534c4995af74..bdd6d375bfd04874003cb09693c986e57c378111 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -799,7 +799,7 @@ py_test_modules( # it is found that windows CI will run all the training unittests with the ON_INFER option turned on, # which will not appear in other CIs. The calculation behavior of some ops in inference mode is # inconsistent with that in non-inference mode. -if(NOT ON_INFER) +if(WITH_PYTHON) py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES test_parallel_executor_seresnext_base_cpu) py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt index e1611d524ab8ea9eba88f46c6dcba04e800d401b..dba92870d3ab72813b7e03cf28d2f5d8516b8e01 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt @@ -34,7 +34,7 @@ set(TEST_EAGER_OPS list(REMOVE_ITEM TEST_OPS test_lac) # NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope # will be removed and will cause some random failed in multi-thread. -if(NOT ON_INFER) +if(WITH_PYTHON) py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1) set_tests_properties(test_lac PROPERTIES TIMEOUT 120) endif()