From cbe64cc1cfc6d7883ce1eadebf365273bd24e352 Mon Sep 17 00:00:00 2001 From: JingZhuangzhuang <75348594+JZZ-NOTE@users.noreply.github.com> Date: Wed, 14 Sep 2022 20:39:01 +0800 Subject: [PATCH] Support inference compilation in training package (#46008) * merge python lib * Update third_party.cmake * Update CMakeLists.txt --- CMakeLists.txt | 2 +- cmake/third_party.cmake | 2 +- .../fluid/framework/new_executor/new_executor_defs.cc | 9 --------- .../fluid/framework/new_executor/new_executor_defs.h | 9 --------- paddle/fluid/framework/scope.cc | 11 ----------- paddle/fluid/framework/scope.h | 3 --- paddle/fluid/platform/flags.cc | 8 -------- paddle/fluid/pybind/CMakeLists.txt | 2 +- paddle/fluid/pybind/pybind.cc | 2 +- python/paddle/fluid/tests/unittests/CMakeLists.txt | 2 +- .../tests/unittests/dygraph_to_static/CMakeLists.txt | 2 +- 11 files changed, 6 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 49ccb815c9..290310858f 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -249,7 +249,7 @@ option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(WITH_DISTRIBUTE "Compile with distributed support" OFF) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(ON_INFER "Turn on inference optimization and inference-lib generation" - OFF) + ON) ################################ Internal Configurations ####################################### option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF) option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake index 94fb1b4d83..bfba3dfbac 100755 --- a/cmake/third_party.cmake +++ b/cmake/third_party.cmake @@ -236,7 +236,7 @@ endif() if(WIN32 OR APPLE OR NOT WITH_GPU - OR ON_INFER) + OR (ON_INFER AND NOT WITH_PYTHON)) set(WITH_DGC OFF) endif() diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index 8ee7065368..c40a80ce07 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -21,17 +21,8 @@ #include "paddle/phi/core/utils/rw_lock.h" -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.h b/paddle/fluid/framework/new_executor/new_executor_defs.h index 82eb237e73..2df8892f5b 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.h +++ b/paddle/fluid/framework/new_executor/new_executor_defs.h @@ -24,17 +24,8 @@ #include "paddle/fluid/platform/event.h" #include "paddle/phi/core/utils/rw_lock.h" -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_VARS_READER_LOCK AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.cc b/paddle/fluid/framework/scope.cc index 87312cbfde..88ffeb5950 100644 --- a/paddle/fluid/framework/scope.cc +++ b/paddle/fluid/framework/scope.cc @@ -25,21 +25,10 @@ PADDLE_DEFINE_EXPORTED_bool( "Delete local scope eagerly. It will reduce GPU memory usage but " "slow down the destruction of variables.(around 1% performance harm)"); -// When in inference scenario, the scopes will not be written by two threads in -// a mean time, but a scope may be read by multiple threads concurrently, and -// the mutex will cause serious performance issue. -// So the mutex is disabled when `ON_INFER`. -#ifdef PADDLE_ON_INFERENCE -#define SCOPE_KIDS_READER_LOCK -#define SCOPE_KIDS_WRITER_LOCK -#define SCOPE_VARS_READER_LOCK -#define SCOPE_VARS_WRITER_LOCK -#else #define SCOPE_KIDS_READER_LOCK phi::AutoRDLock auto_lock(&kids_lock_); #define SCOPE_KIDS_WRITER_LOCK phi::AutoWRLock auto_lock(&kids_lock_); #define SCOPE_VARS_READER_LOCK phi::AutoRDLock auto_lock(&vars_lock_); #define SCOPE_VARS_WRITER_LOCK phi::AutoWRLock auto_lock(&vars_lock_); -#endif namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope.h b/paddle/fluid/framework/scope.h index cf939a5698..7f08fc9b4e 100644 --- a/paddle/fluid/framework/scope.h +++ b/paddle/fluid/framework/scope.h @@ -179,12 +179,9 @@ class Scope : public ScopeBase { DISABLE_COPY_AND_ASSIGN(Scope); -#ifndef PADDLE_ON_INFERENCE - private: mutable phi::RWLock kids_lock_; mutable phi::RWLock vars_lock_; -#endif }; // Generate some debug string about the inherience structure of scope, quite diff --git a/paddle/fluid/platform/flags.cc b/paddle/fluid/platform/flags.cc index 28dddc1fbe..dd66ab179b 100644 --- a/paddle/fluid/platform/flags.cc +++ b/paddle/fluid/platform/flags.cc @@ -388,11 +388,7 @@ PADDLE_DEFINE_EXPORTED_int32( * enable garbage collection strategy when training large networks. */ // Disable gc by default when inference library is built -#ifdef PADDLE_ON_INFERENCE -static const double kDefaultEagerDeleteTensorGB = -1; -#else static const double kDefaultEagerDeleteTensorGB = 0; -#endif PADDLE_DEFINE_EXPORTED_double( eager_delete_tensor_gb, @@ -663,11 +659,7 @@ PADDLE_DEFINE_EXPORTED_bool(use_mkldnn, false, "Use MKLDNN to run"); * If FLAGS_call_stack_level == 2, the python stack, c++ stack, and error * message summary will be shown. */ -#ifdef PADDLE_ON_INFERENCE -static const int32_t kDefaultCallStackLevel = 2; -#else static const int32_t kDefaultCallStackLevel = 1; -#endif PADDLE_DEFINE_EXPORTED_int32( call_stack_level, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index b784affc07..72885c0bbe 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -140,7 +140,7 @@ if(WITH_CUSTOM_DEVICE) set(PYBIND_DEPS ${PYBIND_DEPS} custom_device_common_op_registry) endif() -if(NOT ON_INFER) +if(WITH_PYTHON) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup eager_reducer) if(WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} processgroup_nccl) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index b366517caf..0044f037fc 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -2547,7 +2547,7 @@ All parameter, weight, gradient are variables in Paddle. BindCompatible(&m); BindDataset(&m); BindGenerator(&m); -#ifndef PADDLE_ON_INFERENCE +#ifndef PADDLE_NO_PYTHON BindDistributed(&m); #endif #ifdef PADDLE_WITH_ASCEND diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 58debc39dd..bdd6d375bf 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -799,7 +799,7 @@ py_test_modules( # it is found that windows CI will run all the training unittests with the ON_INFER option turned on, # which will not appear in other CIs. The calculation behavior of some ops in inference mode is # inconsistent with that in non-inference mode. -if(NOT ON_INFER) +if(WITH_PYTHON) py_test_modules(test_parallel_executor_seresnext_base_cpu MODULES test_parallel_executor_seresnext_base_cpu) py_test_modules(test_parallel_executor_seresnext_with_reduce_cpu MODULES diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt index e1611d524a..dba92870d3 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/CMakeLists.txt @@ -34,7 +34,7 @@ set(TEST_EAGER_OPS list(REMOVE_ITEM TEST_OPS test_lac) # NOTE(Aurelius84): In case of Windows CI, if open ON_INFER, RWLOCK of Scope # will be removed and will cause some random failed in multi-thread. -if(NOT ON_INFER) +if(WITH_PYTHON) py_test_modules(test_lac MODULES test_lac ENVS FLAGS_enable_eager_mode=1) set_tests_properties(test_lac PROPERTIES TIMEOUT 120) endif() -- GitLab